File Coverage

blib/lib/Eutf2.pm

Criterion	Covered	Total	%
statement	1005	3114	32.2
branch	1071	2704	39.6
condition	123	373	32.9
subroutine	70	125	56.0
pod	7	74	9.4
total	2276	6390	35.6

line	stmt	bran	cond	sub	pod	time	code
1							package Eutf2;
2							######################################################################
3							#
4							# Eutf2 - Run-time routines for UTF2.pm
5							#
6							# http://search.cpan.org/dist/Char-UTF2/
7							#
8							# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 INABA Hitoshi
9							######################################################################
10
11	302			302		4106	use 5.00503; # Galapagos Consensus 1998 for primetools
	302					680
12							# use 5.008001; # Lancaster Consensus 2013 for toolchains
13
14							# 12.3. Delaying use Until Runtime
15							# in Chapter 12. Packages, Libraries, and Modules
16							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
17							# (and so on)
18
19							# Version numbers should be boring
20							# http://www.dagolden.com/index.php/369/version-numbers-should-be-boring/
21							# For the impatient, the disinterested or those who just want to follow
22							# a recipe, my advice for all modules is this:
23							# our $VERSION = "0.001"; # or "0.001_001" for a dev release
24							# $VERSION = eval $VERSION;
25
26	302			302		15215	BEGIN { CORE::eval q{ use vars qw($VERSION) } }
	302			302		1125
	302					364
	302					33514
27							$VERSION = '1.08';
28							$VERSION = CORE::eval $VERSION;
29
30							BEGIN {
31	302	50		302		1408	if ($^X =~ / jperl /oxmsi) {
32	0					0	die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)\n";
33							}
34	302					303	if (CORE::ord('A') == 193) {
35							die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).\n";
36							}
37	302					29593	if (CORE::ord('A') != 0x41) {
38							die __FILE__, ": is not US-ASCII script (must be US-ASCII script).\n";
39							}
40							}
41
42							BEGIN {
43
44							# instead of utf8.pm
45	302			302		14912	CORE::eval q{
	302			302		1181
	302			103		350
	302					30245
	82					5832
	78					5543
	72					5166
	70					4888
46							no warnings qw(redefine);
47							*utf8::upgrade = sub { CORE::length $_[0] };
48							*utf8::downgrade = sub { 1 };
49							*utf8::encode = sub { };
50							*utf8::decode = sub { 1 };
51							*utf8::is_utf8 = sub { };
52							*utf8::valid = sub { 1 };
53							};
54	302	50				121714	if ($@) {
55	0					0	*utf8::upgrade = sub { CORE::length $_[0] };
	0					0
56	0					0	*utf8::downgrade = sub { 1 };
	0					0
57	0					0	*utf8::encode = sub { };
58	0					0	*utf8::decode = sub { 1 };
	0					0
59	0					0	*utf8::is_utf8 = sub { };
60	0					0	*utf8::valid = sub { 1 };
	0					0
61							}
62							}
63
64							# instead of Symbol.pm
65	0					0	BEGIN {
66	302			302		585	my $genpkg = "Symbol::";
67	302					10159	my $genseq = 0;
68
69							sub gensym () {
70	0			0	0	0	my $name = "GEN" . $genseq++;
71
72							# here, no strict qw(refs); if strict.pm exists
73
74	0					0	my $ref = \*{$genpkg . $name};
	0					0
75	0					0	delete $$genpkg{$name};
76	0					0	return $ref;
77							}
78
79							sub qualify ($;$) {
80	0			0	0	0	my ($name) = @_;
81	0	0	0			0	if (!ref($name) && (Eutf2::index($name, '::') == -1) && (Eutf2::index($name, "'") == -1)) {
			0
82	0					0	my $pkg;
83	0					0	my %global = map {$_ => 1} qw(ARGV ARGVOUT ENV INC SIG STDERR STDIN STDOUT DATA);
	0					0
84
85							# Global names: special character, "^xyz", or other.
86	0	0	0			0	if ($name =~ /^(([^\x80-\xFFa-z])\|(\^[a-z_]+))\z/i \|\| $global{$name}) {
87							# RGS 2001-11-05 : translate leading ^X to control-char
88	0					0	$name =~ s/^\^([a-z_])/'qq(\c'.$1.')'/eei;
	0					0
89	0					0	$pkg = "main";
90							}
91							else {
92	0	0				0	$pkg = (@_ > 1) ? $_[1] : caller;
93							}
94	0					0	$name = $pkg . "::" . $name;
95							}
96	0					0	return $name;
97							}
98
99							sub qualify_to_ref ($;$) {
100
101							# here, no strict qw(refs); if strict.pm exists
102
103	0	0		0	0	0	return \*{ qualify $_[0], @_ > 1 ? $_[1] : caller };
	0					0
104							}
105							}
106
107							# Column: local $@
108							# in Chapter 9. Osaete okitai Perl no kiso
109							# of ISBN 10: 4798119172 \| ISBN 13: 978-4798119175 MODAN Perl NYUMON
110							# (and so on)
111
112							# use strict; if strict.pm exists
113							BEGIN {
114	302	50		302		440	if (CORE::eval { local $@; CORE::require strict }) {
	302					529
	302					2413
115	302					25064	strict::->import;
116							}
117							}
118
119							# P.714 29.2.39. flock
120							# in Chapter 29: Functions
121							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
122
123							# P.863 flock
124							# in Chapter 27: Functions
125							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
126
127							sub LOCK_SH() {1}
128							sub LOCK_EX() {2}
129							sub LOCK_UN() {8}
130							sub LOCK_NB() {4}
131
132							# instead of Carp.pm
133							sub carp;
134							sub croak;
135							sub cluck;
136							sub confess;
137
138							# 6.18. Matching Multiple-Byte Characters
139							# in Chapter 6. Pattern Matching
140							# of ISBN 978-1-56592-243-3 Perl Perl Cookbook.
141							# (and so on)
142
143							# regexp of character
144	302			302		16099	BEGIN { CORE::eval q{ use vars qw($your_char) } } $your_char = q{(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF]\|[\x00-\x7F\xF5-\xFF]};
	302			302		1160
	302					347
	302					14120
145	302			302		13869	BEGIN { CORE::eval q{ use vars qw($qq_char ) } } $qq_char = qr/\\c[\x40-\x5F]\|\\?(?:$your_char)/oxms;
	302			302		1095
	302					347
	302					14176
146	302			302		13163	BEGIN { CORE::eval q{ use vars qw($q_char ) } } $q_char = qr/$your_char/oxms;
	302			302		1083
	302					340
	302					15760
147
148							#
149							# UTF-8 character range per length
150							#
151							my %range_tr = ();
152
153							#
154							# alias of encoding name
155							#
156	302			302		13604	BEGIN { CORE::eval q{ use vars qw($encoding_alias) } }
	302			302		1043
	302					333
	302					2529697
157
158							#
159							# UTF-8 case conversion
160							#
161							my %lc = ();
162							@lc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
163							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
164							my %uc = ();
165							@uc{qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)} =
166							qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
167							my %fc = ();
168							@fc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
169							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
170
171							if (0) {
172							}
173
174							elsif (__PACKAGE__ =~ / \b Eutf2 \z/oxms) {
175							%range_tr = (
176							1 => [ [0x00..0x7F],
177							[0xF5..0xFF], # malformed octet
178							],
179							2 => [ [0xC2..0xDF],[0x80..0xBF],
180							],
181							3 => [ [0xE0..0xE0],[0xA0..0xBF],[0x80..0xBF],
182							[0xE1..0xEC],[0x80..0xBF],[0x80..0xBF],
183							[0xED..0xED],[0x80..0x9F],[0x80..0xBF],
184							[0xEE..0xEF],[0x80..0xBF],[0x80..0xBF],
185							],
186							4 => [ [0xF0..0xF0],[0x90..0xBF],[0x80..0xBF],[0x80..0xBF],
187							[0xF1..0xF3],[0x80..0xBF],[0x80..0xBF],[0x80..0xBF],
188							[0xF4..0xF4],[0x80..0x8F],[0x80..0xBF],[0x80..0xBF],
189							],
190							);
191							$encoding_alias = qr/ \b (?: utf-8 \| utf-8-strict \| utf-?2 ) \b /oxmsi;
192
193							# CaseFolding-9.0.0.txt
194							# Date: 2016-03-02, 18:54:54 GMT
195							# c 2016 UnicodeR, Inc.
196							# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
197							# For terms of use, see http://www.unicode.org/terms_of_use.html
198							#
199							# Unicode Character Database
200							# For documentation, see http://www.unicode.org/reports/tr44/
201
202							# you can use "make_CaseFolding.pl" to update this hash
203
204							%fc = (
205							"\x41" => "\x61", # LATIN CAPITAL LETTER A
206							"\x42" => "\x62", # LATIN CAPITAL LETTER B
207							"\x43" => "\x63", # LATIN CAPITAL LETTER C
208							"\x44" => "\x64", # LATIN CAPITAL LETTER D
209							"\x45" => "\x65", # LATIN CAPITAL LETTER E
210							"\x46" => "\x66", # LATIN CAPITAL LETTER F
211							"\x47" => "\x67", # LATIN CAPITAL LETTER G
212							"\x48" => "\x68", # LATIN CAPITAL LETTER H
213							"\x49" => "\x69", # LATIN CAPITAL LETTER I
214							"\x4A" => "\x6A", # LATIN CAPITAL LETTER J
215							"\x4B" => "\x6B", # LATIN CAPITAL LETTER K
216							"\x4C" => "\x6C", # LATIN CAPITAL LETTER L
217							"\x4D" => "\x6D", # LATIN CAPITAL LETTER M
218							"\x4E" => "\x6E", # LATIN CAPITAL LETTER N
219							"\x4F" => "\x6F", # LATIN CAPITAL LETTER O
220							"\x50" => "\x70", # LATIN CAPITAL LETTER P
221							"\x51" => "\x71", # LATIN CAPITAL LETTER Q
222							"\x52" => "\x72", # LATIN CAPITAL LETTER R
223							"\x53" => "\x73", # LATIN CAPITAL LETTER S
224							"\x54" => "\x74", # LATIN CAPITAL LETTER T
225							"\x55" => "\x75", # LATIN CAPITAL LETTER U
226							"\x56" => "\x76", # LATIN CAPITAL LETTER V
227							"\x57" => "\x77", # LATIN CAPITAL LETTER W
228							"\x58" => "\x78", # LATIN CAPITAL LETTER X
229							"\x59" => "\x79", # LATIN CAPITAL LETTER Y
230							"\x5A" => "\x7A", # LATIN CAPITAL LETTER Z
231							"\xC2\xB5" => "\xCE\xBC", # MICRO SIGN
232							"\xC3\x80" => "\xC3\xA0", # LATIN CAPITAL LETTER A WITH GRAVE
233							"\xC3\x81" => "\xC3\xA1", # LATIN CAPITAL LETTER A WITH ACUTE
234							"\xC3\x82" => "\xC3\xA2", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
235							"\xC3\x83" => "\xC3\xA3", # LATIN CAPITAL LETTER A WITH TILDE
236							"\xC3\x84" => "\xC3\xA4", # LATIN CAPITAL LETTER A WITH DIAERESIS
237							"\xC3\x85" => "\xC3\xA5", # LATIN CAPITAL LETTER A WITH RING ABOVE
238							"\xC3\x86" => "\xC3\xA6", # LATIN CAPITAL LETTER AE
239							"\xC3\x87" => "\xC3\xA7", # LATIN CAPITAL LETTER C WITH CEDILLA
240							"\xC3\x88" => "\xC3\xA8", # LATIN CAPITAL LETTER E WITH GRAVE
241							"\xC3\x89" => "\xC3\xA9", # LATIN CAPITAL LETTER E WITH ACUTE
242							"\xC3\x8A" => "\xC3\xAA", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
243							"\xC3\x8B" => "\xC3\xAB", # LATIN CAPITAL LETTER E WITH DIAERESIS
244							"\xC3\x8C" => "\xC3\xAC", # LATIN CAPITAL LETTER I WITH GRAVE
245							"\xC3\x8D" => "\xC3\xAD", # LATIN CAPITAL LETTER I WITH ACUTE
246							"\xC3\x8E" => "\xC3\xAE", # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
247							"\xC3\x8F" => "\xC3\xAF", # LATIN CAPITAL LETTER I WITH DIAERESIS
248							"\xC3\x90" => "\xC3\xB0", # LATIN CAPITAL LETTER ETH
249							"\xC3\x91" => "\xC3\xB1", # LATIN CAPITAL LETTER N WITH TILDE
250							"\xC3\x92" => "\xC3\xB2", # LATIN CAPITAL LETTER O WITH GRAVE
251							"\xC3\x93" => "\xC3\xB3", # LATIN CAPITAL LETTER O WITH ACUTE
252							"\xC3\x94" => "\xC3\xB4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
253							"\xC3\x95" => "\xC3\xB5", # LATIN CAPITAL LETTER O WITH TILDE
254							"\xC3\x96" => "\xC3\xB6", # LATIN CAPITAL LETTER O WITH DIAERESIS
255							"\xC3\x98" => "\xC3\xB8", # LATIN CAPITAL LETTER O WITH STROKE
256							"\xC3\x99" => "\xC3\xB9", # LATIN CAPITAL LETTER U WITH GRAVE
257							"\xC3\x9A" => "\xC3\xBA", # LATIN CAPITAL LETTER U WITH ACUTE
258							"\xC3\x9B" => "\xC3\xBB", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
259							"\xC3\x9C" => "\xC3\xBC", # LATIN CAPITAL LETTER U WITH DIAERESIS
260							"\xC3\x9D" => "\xC3\xBD", # LATIN CAPITAL LETTER Y WITH ACUTE
261							"\xC3\x9E" => "\xC3\xBE", # LATIN CAPITAL LETTER THORN
262							"\xC3\x9F" => "\x73\x73", # LATIN SMALL LETTER SHARP S
263							"\xC4\x80" => "\xC4\x81", # LATIN CAPITAL LETTER A WITH MACRON
264							"\xC4\x82" => "\xC4\x83", # LATIN CAPITAL LETTER A WITH BREVE
265							"\xC4\x84" => "\xC4\x85", # LATIN CAPITAL LETTER A WITH OGONEK
266							"\xC4\x86" => "\xC4\x87", # LATIN CAPITAL LETTER C WITH ACUTE
267							"\xC4\x88" => "\xC4\x89", # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
268							"\xC4\x8A" => "\xC4\x8B", # LATIN CAPITAL LETTER C WITH DOT ABOVE
269							"\xC4\x8C" => "\xC4\x8D", # LATIN CAPITAL LETTER C WITH CARON
270							"\xC4\x8E" => "\xC4\x8F", # LATIN CAPITAL LETTER D WITH CARON
271							"\xC4\x90" => "\xC4\x91", # LATIN CAPITAL LETTER D WITH STROKE
272							"\xC4\x92" => "\xC4\x93", # LATIN CAPITAL LETTER E WITH MACRON
273							"\xC4\x94" => "\xC4\x95", # LATIN CAPITAL LETTER E WITH BREVE
274							"\xC4\x96" => "\xC4\x97", # LATIN CAPITAL LETTER E WITH DOT ABOVE
275							"\xC4\x98" => "\xC4\x99", # LATIN CAPITAL LETTER E WITH OGONEK
276							"\xC4\x9A" => "\xC4\x9B", # LATIN CAPITAL LETTER E WITH CARON
277							"\xC4\x9C" => "\xC4\x9D", # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
278							"\xC4\x9E" => "\xC4\x9F", # LATIN CAPITAL LETTER G WITH BREVE
279							"\xC4\xA0" => "\xC4\xA1", # LATIN CAPITAL LETTER G WITH DOT ABOVE
280							"\xC4\xA2" => "\xC4\xA3", # LATIN CAPITAL LETTER G WITH CEDILLA
281							"\xC4\xA4" => "\xC4\xA5", # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
282							"\xC4\xA6" => "\xC4\xA7", # LATIN CAPITAL LETTER H WITH STROKE
283							"\xC4\xA8" => "\xC4\xA9", # LATIN CAPITAL LETTER I WITH TILDE
284							"\xC4\xAA" => "\xC4\xAB", # LATIN CAPITAL LETTER I WITH MACRON
285							"\xC4\xAC" => "\xC4\xAD", # LATIN CAPITAL LETTER I WITH BREVE
286							"\xC4\xAE" => "\xC4\xAF", # LATIN CAPITAL LETTER I WITH OGONEK
287							"\xC4\xB0" => "\x69\xCC\x87", # LATIN CAPITAL LETTER I WITH DOT ABOVE
288							"\xC4\xB2" => "\xC4\xB3", # LATIN CAPITAL LIGATURE IJ
289							"\xC4\xB4" => "\xC4\xB5", # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
290							"\xC4\xB6" => "\xC4\xB7", # LATIN CAPITAL LETTER K WITH CEDILLA
291							"\xC4\xB9" => "\xC4\xBA", # LATIN CAPITAL LETTER L WITH ACUTE
292							"\xC4\xBB" => "\xC4\xBC", # LATIN CAPITAL LETTER L WITH CEDILLA
293							"\xC4\xBD" => "\xC4\xBE", # LATIN CAPITAL LETTER L WITH CARON
294							"\xC4\xBF" => "\xC5\x80", # LATIN CAPITAL LETTER L WITH MIDDLE DOT
295							"\xC5\x81" => "\xC5\x82", # LATIN CAPITAL LETTER L WITH STROKE
296							"\xC5\x83" => "\xC5\x84", # LATIN CAPITAL LETTER N WITH ACUTE
297							"\xC5\x85" => "\xC5\x86", # LATIN CAPITAL LETTER N WITH CEDILLA
298							"\xC5\x87" => "\xC5\x88", # LATIN CAPITAL LETTER N WITH CARON
299							"\xC5\x89" => "\xCA\xBC\x6E", # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
300							"\xC5\x8A" => "\xC5\x8B", # LATIN CAPITAL LETTER ENG
301							"\xC5\x8C" => "\xC5\x8D", # LATIN CAPITAL LETTER O WITH MACRON
302							"\xC5\x8E" => "\xC5\x8F", # LATIN CAPITAL LETTER O WITH BREVE
303							"\xC5\x90" => "\xC5\x91", # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
304							"\xC5\x92" => "\xC5\x93", # LATIN CAPITAL LIGATURE OE
305							"\xC5\x94" => "\xC5\x95", # LATIN CAPITAL LETTER R WITH ACUTE
306							"\xC5\x96" => "\xC5\x97", # LATIN CAPITAL LETTER R WITH CEDILLA
307							"\xC5\x98" => "\xC5\x99", # LATIN CAPITAL LETTER R WITH CARON
308							"\xC5\x9A" => "\xC5\x9B", # LATIN CAPITAL LETTER S WITH ACUTE
309							"\xC5\x9C" => "\xC5\x9D", # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
310							"\xC5\x9E" => "\xC5\x9F", # LATIN CAPITAL LETTER S WITH CEDILLA
311							"\xC5\xA0" => "\xC5\xA1", # LATIN CAPITAL LETTER S WITH CARON
312							"\xC5\xA2" => "\xC5\xA3", # LATIN CAPITAL LETTER T WITH CEDILLA
313							"\xC5\xA4" => "\xC5\xA5", # LATIN CAPITAL LETTER T WITH CARON
314							"\xC5\xA6" => "\xC5\xA7", # LATIN CAPITAL LETTER T WITH STROKE
315							"\xC5\xA8" => "\xC5\xA9", # LATIN CAPITAL LETTER U WITH TILDE
316							"\xC5\xAA" => "\xC5\xAB", # LATIN CAPITAL LETTER U WITH MACRON
317							"\xC5\xAC" => "\xC5\xAD", # LATIN CAPITAL LETTER U WITH BREVE
318							"\xC5\xAE" => "\xC5\xAF", # LATIN CAPITAL LETTER U WITH RING ABOVE
319							"\xC5\xB0" => "\xC5\xB1", # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
320							"\xC5\xB2" => "\xC5\xB3", # LATIN CAPITAL LETTER U WITH OGONEK
321							"\xC5\xB4" => "\xC5\xB5", # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
322							"\xC5\xB6" => "\xC5\xB7", # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
323							"\xC5\xB8" => "\xC3\xBF", # LATIN CAPITAL LETTER Y WITH DIAERESIS
324							"\xC5\xB9" => "\xC5\xBA", # LATIN CAPITAL LETTER Z WITH ACUTE
325							"\xC5\xBB" => "\xC5\xBC", # LATIN CAPITAL LETTER Z WITH DOT ABOVE
326							"\xC5\xBD" => "\xC5\xBE", # LATIN CAPITAL LETTER Z WITH CARON
327							"\xC5\xBF" => "\x73", # LATIN SMALL LETTER LONG S
328							"\xC6\x81" => "\xC9\x93", # LATIN CAPITAL LETTER B WITH HOOK
329							"\xC6\x82" => "\xC6\x83", # LATIN CAPITAL LETTER B WITH TOPBAR
330							"\xC6\x84" => "\xC6\x85", # LATIN CAPITAL LETTER TONE SIX
331							"\xC6\x86" => "\xC9\x94", # LATIN CAPITAL LETTER OPEN O
332							"\xC6\x87" => "\xC6\x88", # LATIN CAPITAL LETTER C WITH HOOK
333							"\xC6\x89" => "\xC9\x96", # LATIN CAPITAL LETTER AFRICAN D
334							"\xC6\x8A" => "\xC9\x97", # LATIN CAPITAL LETTER D WITH HOOK
335							"\xC6\x8B" => "\xC6\x8C", # LATIN CAPITAL LETTER D WITH TOPBAR
336							"\xC6\x8E" => "\xC7\x9D", # LATIN CAPITAL LETTER REVERSED E
337							"\xC6\x8F" => "\xC9\x99", # LATIN CAPITAL LETTER SCHWA
338							"\xC6\x90" => "\xC9\x9B", # LATIN CAPITAL LETTER OPEN E
339							"\xC6\x91" => "\xC6\x92", # LATIN CAPITAL LETTER F WITH HOOK
340							"\xC6\x93" => "\xC9\xA0", # LATIN CAPITAL LETTER G WITH HOOK
341							"\xC6\x94" => "\xC9\xA3", # LATIN CAPITAL LETTER GAMMA
342							"\xC6\x96" => "\xC9\xA9", # LATIN CAPITAL LETTER IOTA
343							"\xC6\x97" => "\xC9\xA8", # LATIN CAPITAL LETTER I WITH STROKE
344							"\xC6\x98" => "\xC6\x99", # LATIN CAPITAL LETTER K WITH HOOK
345							"\xC6\x9C" => "\xC9\xAF", # LATIN CAPITAL LETTER TURNED M
346							"\xC6\x9D" => "\xC9\xB2", # LATIN CAPITAL LETTER N WITH LEFT HOOK
347							"\xC6\x9F" => "\xC9\xB5", # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
348							"\xC6\xA0" => "\xC6\xA1", # LATIN CAPITAL LETTER O WITH HORN
349							"\xC6\xA2" => "\xC6\xA3", # LATIN CAPITAL LETTER OI
350							"\xC6\xA4" => "\xC6\xA5", # LATIN CAPITAL LETTER P WITH HOOK
351							"\xC6\xA6" => "\xCA\x80", # LATIN LETTER YR
352							"\xC6\xA7" => "\xC6\xA8", # LATIN CAPITAL LETTER TONE TWO
353							"\xC6\xA9" => "\xCA\x83", # LATIN CAPITAL LETTER ESH
354							"\xC6\xAC" => "\xC6\xAD", # LATIN CAPITAL LETTER T WITH HOOK
355							"\xC6\xAE" => "\xCA\x88", # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
356							"\xC6\xAF" => "\xC6\xB0", # LATIN CAPITAL LETTER U WITH HORN
357							"\xC6\xB1" => "\xCA\x8A", # LATIN CAPITAL LETTER UPSILON
358							"\xC6\xB2" => "\xCA\x8B", # LATIN CAPITAL LETTER V WITH HOOK
359							"\xC6\xB3" => "\xC6\xB4", # LATIN CAPITAL LETTER Y WITH HOOK
360							"\xC6\xB5" => "\xC6\xB6", # LATIN CAPITAL LETTER Z WITH STROKE
361							"\xC6\xB7" => "\xCA\x92", # LATIN CAPITAL LETTER EZH
362							"\xC6\xB8" => "\xC6\xB9", # LATIN CAPITAL LETTER EZH REVERSED
363							"\xC6\xBC" => "\xC6\xBD", # LATIN CAPITAL LETTER TONE FIVE
364							"\xC7\x84" => "\xC7\x86", # LATIN CAPITAL LETTER DZ WITH CARON
365							"\xC7\x85" => "\xC7\x86", # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
366							"\xC7\x87" => "\xC7\x89", # LATIN CAPITAL LETTER LJ
367							"\xC7\x88" => "\xC7\x89", # LATIN CAPITAL LETTER L WITH SMALL LETTER J
368							"\xC7\x8A" => "\xC7\x8C", # LATIN CAPITAL LETTER NJ
369							"\xC7\x8B" => "\xC7\x8C", # LATIN CAPITAL LETTER N WITH SMALL LETTER J
370							"\xC7\x8D" => "\xC7\x8E", # LATIN CAPITAL LETTER A WITH CARON
371							"\xC7\x8F" => "\xC7\x90", # LATIN CAPITAL LETTER I WITH CARON
372							"\xC7\x91" => "\xC7\x92", # LATIN CAPITAL LETTER O WITH CARON
373							"\xC7\x93" => "\xC7\x94", # LATIN CAPITAL LETTER U WITH CARON
374							"\xC7\x95" => "\xC7\x96", # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
375							"\xC7\x97" => "\xC7\x98", # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
376							"\xC7\x99" => "\xC7\x9A", # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
377							"\xC7\x9B" => "\xC7\x9C", # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
378							"\xC7\x9E" => "\xC7\x9F", # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
379							"\xC7\xA0" => "\xC7\xA1", # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
380							"\xC7\xA2" => "\xC7\xA3", # LATIN CAPITAL LETTER AE WITH MACRON
381							"\xC7\xA4" => "\xC7\xA5", # LATIN CAPITAL LETTER G WITH STROKE
382							"\xC7\xA6" => "\xC7\xA7", # LATIN CAPITAL LETTER G WITH CARON
383							"\xC7\xA8" => "\xC7\xA9", # LATIN CAPITAL LETTER K WITH CARON
384							"\xC7\xAA" => "\xC7\xAB", # LATIN CAPITAL LETTER O WITH OGONEK
385							"\xC7\xAC" => "\xC7\xAD", # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
386							"\xC7\xAE" => "\xC7\xAF", # LATIN CAPITAL LETTER EZH WITH CARON
387							"\xC7\xB0" => "\x6A\xCC\x8C", # LATIN SMALL LETTER J WITH CARON
388							"\xC7\xB1" => "\xC7\xB3", # LATIN CAPITAL LETTER DZ
389							"\xC7\xB2" => "\xC7\xB3", # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
390							"\xC7\xB4" => "\xC7\xB5", # LATIN CAPITAL LETTER G WITH ACUTE
391							"\xC7\xB6" => "\xC6\x95", # LATIN CAPITAL LETTER HWAIR
392							"\xC7\xB7" => "\xC6\xBF", # LATIN CAPITAL LETTER WYNN
393							"\xC7\xB8" => "\xC7\xB9", # LATIN CAPITAL LETTER N WITH GRAVE
394							"\xC7\xBA" => "\xC7\xBB", # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
395							"\xC7\xBC" => "\xC7\xBD", # LATIN CAPITAL LETTER AE WITH ACUTE
396							"\xC7\xBE" => "\xC7\xBF", # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
397							"\xC8\x80" => "\xC8\x81", # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
398							"\xC8\x82" => "\xC8\x83", # LATIN CAPITAL LETTER A WITH INVERTED BREVE
399							"\xC8\x84" => "\xC8\x85", # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
400							"\xC8\x86" => "\xC8\x87", # LATIN CAPITAL LETTER E WITH INVERTED BREVE
401							"\xC8\x88" => "\xC8\x89", # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
402							"\xC8\x8A" => "\xC8\x8B", # LATIN CAPITAL LETTER I WITH INVERTED BREVE
403							"\xC8\x8C" => "\xC8\x8D", # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
404							"\xC8\x8E" => "\xC8\x8F", # LATIN CAPITAL LETTER O WITH INVERTED BREVE
405							"\xC8\x90" => "\xC8\x91", # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
406							"\xC8\x92" => "\xC8\x93", # LATIN CAPITAL LETTER R WITH INVERTED BREVE
407							"\xC8\x94" => "\xC8\x95", # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
408							"\xC8\x96" => "\xC8\x97", # LATIN CAPITAL LETTER U WITH INVERTED BREVE
409							"\xC8\x98" => "\xC8\x99", # LATIN CAPITAL LETTER S WITH COMMA BELOW
410							"\xC8\x9A" => "\xC8\x9B", # LATIN CAPITAL LETTER T WITH COMMA BELOW
411							"\xC8\x9C" => "\xC8\x9D", # LATIN CAPITAL LETTER YOGH
412							"\xC8\x9E" => "\xC8\x9F", # LATIN CAPITAL LETTER H WITH CARON
413							"\xC8\xA0" => "\xC6\x9E", # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
414							"\xC8\xA2" => "\xC8\xA3", # LATIN CAPITAL LETTER OU
415							"\xC8\xA4" => "\xC8\xA5", # LATIN CAPITAL LETTER Z WITH HOOK
416							"\xC8\xA6" => "\xC8\xA7", # LATIN CAPITAL LETTER A WITH DOT ABOVE
417							"\xC8\xA8" => "\xC8\xA9", # LATIN CAPITAL LETTER E WITH CEDILLA
418							"\xC8\xAA" => "\xC8\xAB", # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
419							"\xC8\xAC" => "\xC8\xAD", # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
420							"\xC8\xAE" => "\xC8\xAF", # LATIN CAPITAL LETTER O WITH DOT ABOVE
421							"\xC8\xB0" => "\xC8\xB1", # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
422							"\xC8\xB2" => "\xC8\xB3", # LATIN CAPITAL LETTER Y WITH MACRON
423							"\xC8\xBA" => "\xE2\xB1\xA5", # LATIN CAPITAL LETTER A WITH STROKE
424							"\xC8\xBB" => "\xC8\xBC", # LATIN CAPITAL LETTER C WITH STROKE
425							"\xC8\xBD" => "\xC6\x9A", # LATIN CAPITAL LETTER L WITH BAR
426							"\xC8\xBE" => "\xE2\xB1\xA6", # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
427							"\xC9\x81" => "\xC9\x82", # LATIN CAPITAL LETTER GLOTTAL STOP
428							"\xC9\x83" => "\xC6\x80", # LATIN CAPITAL LETTER B WITH STROKE
429							"\xC9\x84" => "\xCA\x89", # LATIN CAPITAL LETTER U BAR
430							"\xC9\x85" => "\xCA\x8C", # LATIN CAPITAL LETTER TURNED V
431							"\xC9\x86" => "\xC9\x87", # LATIN CAPITAL LETTER E WITH STROKE
432							"\xC9\x88" => "\xC9\x89", # LATIN CAPITAL LETTER J WITH STROKE
433							"\xC9\x8A" => "\xC9\x8B", # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
434							"\xC9\x8C" => "\xC9\x8D", # LATIN CAPITAL LETTER R WITH STROKE
435							"\xC9\x8E" => "\xC9\x8F", # LATIN CAPITAL LETTER Y WITH STROKE
436							"\xCD\x85" => "\xCE\xB9", # COMBINING GREEK YPOGEGRAMMENI
437							"\xCD\xB0" => "\xCD\xB1", # GREEK CAPITAL LETTER HETA
438							"\xCD\xB2" => "\xCD\xB3", # GREEK CAPITAL LETTER ARCHAIC SAMPI
439							"\xCD\xB6" => "\xCD\xB7", # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
440							"\xCD\xBF" => "\xCF\xB3", # GREEK CAPITAL LETTER YOT
441							"\xCE\x86" => "\xCE\xAC", # GREEK CAPITAL LETTER ALPHA WITH TONOS
442							"\xCE\x88" => "\xCE\xAD", # GREEK CAPITAL LETTER EPSILON WITH TONOS
443							"\xCE\x89" => "\xCE\xAE", # GREEK CAPITAL LETTER ETA WITH TONOS
444							"\xCE\x8A" => "\xCE\xAF", # GREEK CAPITAL LETTER IOTA WITH TONOS
445							"\xCE\x8C" => "\xCF\x8C", # GREEK CAPITAL LETTER OMICRON WITH TONOS
446							"\xCE\x8E" => "\xCF\x8D", # GREEK CAPITAL LETTER UPSILON WITH TONOS
447							"\xCE\x8F" => "\xCF\x8E", # GREEK CAPITAL LETTER OMEGA WITH TONOS
448							"\xCE\x90" => "\xCE\xB9\xCC\x88\xCC\x81", # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
449							"\xCE\x91" => "\xCE\xB1", # GREEK CAPITAL LETTER ALPHA
450							"\xCE\x92" => "\xCE\xB2", # GREEK CAPITAL LETTER BETA
451							"\xCE\x93" => "\xCE\xB3", # GREEK CAPITAL LETTER GAMMA
452							"\xCE\x94" => "\xCE\xB4", # GREEK CAPITAL LETTER DELTA
453							"\xCE\x95" => "\xCE\xB5", # GREEK CAPITAL LETTER EPSILON
454							"\xCE\x96" => "\xCE\xB6", # GREEK CAPITAL LETTER ZETA
455							"\xCE\x97" => "\xCE\xB7", # GREEK CAPITAL LETTER ETA
456							"\xCE\x98" => "\xCE\xB8", # GREEK CAPITAL LETTER THETA
457							"\xCE\x99" => "\xCE\xB9", # GREEK CAPITAL LETTER IOTA
458							"\xCE\x9A" => "\xCE\xBA", # GREEK CAPITAL LETTER KAPPA
459							"\xCE\x9B" => "\xCE\xBB", # GREEK CAPITAL LETTER LAMDA
460							"\xCE\x9C" => "\xCE\xBC", # GREEK CAPITAL LETTER MU
461							"\xCE\x9D" => "\xCE\xBD", # GREEK CAPITAL LETTER NU
462							"\xCE\x9E" => "\xCE\xBE", # GREEK CAPITAL LETTER XI
463							"\xCE\x9F" => "\xCE\xBF", # GREEK CAPITAL LETTER OMICRON
464							"\xCE\xA0" => "\xCF\x80", # GREEK CAPITAL LETTER PI
465							"\xCE\xA1" => "\xCF\x81", # GREEK CAPITAL LETTER RHO
466							"\xCE\xA3" => "\xCF\x83", # GREEK CAPITAL LETTER SIGMA
467							"\xCE\xA4" => "\xCF\x84", # GREEK CAPITAL LETTER TAU
468							"\xCE\xA5" => "\xCF\x85", # GREEK CAPITAL LETTER UPSILON
469							"\xCE\xA6" => "\xCF\x86", # GREEK CAPITAL LETTER PHI
470							"\xCE\xA7" => "\xCF\x87", # GREEK CAPITAL LETTER CHI
471							"\xCE\xA8" => "\xCF\x88", # GREEK CAPITAL LETTER PSI
472							"\xCE\xA9" => "\xCF\x89", # GREEK CAPITAL LETTER OMEGA
473							"\xCE\xAA" => "\xCF\x8A", # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
474							"\xCE\xAB" => "\xCF\x8B", # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
475							"\xCE\xB0" => "\xCF\x85\xCC\x88\xCC\x81", # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
476							"\xCF\x82" => "\xCF\x83", # GREEK SMALL LETTER FINAL SIGMA
477							"\xCF\x8F" => "\xCF\x97", # GREEK CAPITAL KAI SYMBOL
478							"\xCF\x90" => "\xCE\xB2", # GREEK BETA SYMBOL
479							"\xCF\x91" => "\xCE\xB8", # GREEK THETA SYMBOL
480							"\xCF\x95" => "\xCF\x86", # GREEK PHI SYMBOL
481							"\xCF\x96" => "\xCF\x80", # GREEK PI SYMBOL
482							"\xCF\x98" => "\xCF\x99", # GREEK LETTER ARCHAIC KOPPA
483							"\xCF\x9A" => "\xCF\x9B", # GREEK LETTER STIGMA
484							"\xCF\x9C" => "\xCF\x9D", # GREEK LETTER DIGAMMA
485							"\xCF\x9E" => "\xCF\x9F", # GREEK LETTER KOPPA
486							"\xCF\xA0" => "\xCF\xA1", # GREEK LETTER SAMPI
487							"\xCF\xA2" => "\xCF\xA3", # COPTIC CAPITAL LETTER SHEI
488							"\xCF\xA4" => "\xCF\xA5", # COPTIC CAPITAL LETTER FEI
489							"\xCF\xA6" => "\xCF\xA7", # COPTIC CAPITAL LETTER KHEI
490							"\xCF\xA8" => "\xCF\xA9", # COPTIC CAPITAL LETTER HORI
491							"\xCF\xAA" => "\xCF\xAB", # COPTIC CAPITAL LETTER GANGIA
492							"\xCF\xAC" => "\xCF\xAD", # COPTIC CAPITAL LETTER SHIMA
493							"\xCF\xAE" => "\xCF\xAF", # COPTIC CAPITAL LETTER DEI
494							"\xCF\xB0" => "\xCE\xBA", # GREEK KAPPA SYMBOL
495							"\xCF\xB1" => "\xCF\x81", # GREEK RHO SYMBOL
496							"\xCF\xB4" => "\xCE\xB8", # GREEK CAPITAL THETA SYMBOL
497							"\xCF\xB5" => "\xCE\xB5", # GREEK LUNATE EPSILON SYMBOL
498							"\xCF\xB7" => "\xCF\xB8", # GREEK CAPITAL LETTER SHO
499							"\xCF\xB9" => "\xCF\xB2", # GREEK CAPITAL LUNATE SIGMA SYMBOL
500							"\xCF\xBA" => "\xCF\xBB", # GREEK CAPITAL LETTER SAN
501							"\xCF\xBD" => "\xCD\xBB", # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
502							"\xCF\xBE" => "\xCD\xBC", # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
503							"\xCF\xBF" => "\xCD\xBD", # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
504							"\xD0\x80" => "\xD1\x90", # CYRILLIC CAPITAL LETTER IE WITH GRAVE
505							"\xD0\x81" => "\xD1\x91", # CYRILLIC CAPITAL LETTER IO
506							"\xD0\x82" => "\xD1\x92", # CYRILLIC CAPITAL LETTER DJE
507							"\xD0\x83" => "\xD1\x93", # CYRILLIC CAPITAL LETTER GJE
508							"\xD0\x84" => "\xD1\x94", # CYRILLIC CAPITAL LETTER UKRAINIAN IE
509							"\xD0\x85" => "\xD1\x95", # CYRILLIC CAPITAL LETTER DZE
510							"\xD0\x86" => "\xD1\x96", # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
511							"\xD0\x87" => "\xD1\x97", # CYRILLIC CAPITAL LETTER YI
512							"\xD0\x88" => "\xD1\x98", # CYRILLIC CAPITAL LETTER JE
513							"\xD0\x89" => "\xD1\x99", # CYRILLIC CAPITAL LETTER LJE
514							"\xD0\x8A" => "\xD1\x9A", # CYRILLIC CAPITAL LETTER NJE
515							"\xD0\x8B" => "\xD1\x9B", # CYRILLIC CAPITAL LETTER TSHE
516							"\xD0\x8C" => "\xD1\x9C", # CYRILLIC CAPITAL LETTER KJE
517							"\xD0\x8D" => "\xD1\x9D", # CYRILLIC CAPITAL LETTER I WITH GRAVE
518							"\xD0\x8E" => "\xD1\x9E", # CYRILLIC CAPITAL LETTER SHORT U
519							"\xD0\x8F" => "\xD1\x9F", # CYRILLIC CAPITAL LETTER DZHE
520							"\xD0\x90" => "\xD0\xB0", # CYRILLIC CAPITAL LETTER A
521							"\xD0\x91" => "\xD0\xB1", # CYRILLIC CAPITAL LETTER BE
522							"\xD0\x92" => "\xD0\xB2", # CYRILLIC CAPITAL LETTER VE
523							"\xD0\x93" => "\xD0\xB3", # CYRILLIC CAPITAL LETTER GHE
524							"\xD0\x94" => "\xD0\xB4", # CYRILLIC CAPITAL LETTER DE
525							"\xD0\x95" => "\xD0\xB5", # CYRILLIC CAPITAL LETTER IE
526							"\xD0\x96" => "\xD0\xB6", # CYRILLIC CAPITAL LETTER ZHE
527							"\xD0\x97" => "\xD0\xB7", # CYRILLIC CAPITAL LETTER ZE
528							"\xD0\x98" => "\xD0\xB8", # CYRILLIC CAPITAL LETTER I
529							"\xD0\x99" => "\xD0\xB9", # CYRILLIC CAPITAL LETTER SHORT I
530							"\xD0\x9A" => "\xD0\xBA", # CYRILLIC CAPITAL LETTER KA
531							"\xD0\x9B" => "\xD0\xBB", # CYRILLIC CAPITAL LETTER EL
532							"\xD0\x9C" => "\xD0\xBC", # CYRILLIC CAPITAL LETTER EM
533							"\xD0\x9D" => "\xD0\xBD", # CYRILLIC CAPITAL LETTER EN
534							"\xD0\x9E" => "\xD0\xBE", # CYRILLIC CAPITAL LETTER O
535							"\xD0\x9F" => "\xD0\xBF", # CYRILLIC CAPITAL LETTER PE
536							"\xD0\xA0" => "\xD1\x80", # CYRILLIC CAPITAL LETTER ER
537							"\xD0\xA1" => "\xD1\x81", # CYRILLIC CAPITAL LETTER ES
538							"\xD0\xA2" => "\xD1\x82", # CYRILLIC CAPITAL LETTER TE
539							"\xD0\xA3" => "\xD1\x83", # CYRILLIC CAPITAL LETTER U
540							"\xD0\xA4" => "\xD1\x84", # CYRILLIC CAPITAL LETTER EF
541							"\xD0\xA5" => "\xD1\x85", # CYRILLIC CAPITAL LETTER HA
542							"\xD0\xA6" => "\xD1\x86", # CYRILLIC CAPITAL LETTER TSE
543							"\xD0\xA7" => "\xD1\x87", # CYRILLIC CAPITAL LETTER CHE
544							"\xD0\xA8" => "\xD1\x88", # CYRILLIC CAPITAL LETTER SHA
545							"\xD0\xA9" => "\xD1\x89", # CYRILLIC CAPITAL LETTER SHCHA
546							"\xD0\xAA" => "\xD1\x8A", # CYRILLIC CAPITAL LETTER HARD SIGN
547							"\xD0\xAB" => "\xD1\x8B", # CYRILLIC CAPITAL LETTER YERU
548							"\xD0\xAC" => "\xD1\x8C", # CYRILLIC CAPITAL LETTER SOFT SIGN
549							"\xD0\xAD" => "\xD1\x8D", # CYRILLIC CAPITAL LETTER E
550							"\xD0\xAE" => "\xD1\x8E", # CYRILLIC CAPITAL LETTER YU
551							"\xD0\xAF" => "\xD1\x8F", # CYRILLIC CAPITAL LETTER YA
552							"\xD1\xA0" => "\xD1\xA1", # CYRILLIC CAPITAL LETTER OMEGA
553							"\xD1\xA2" => "\xD1\xA3", # CYRILLIC CAPITAL LETTER YAT
554							"\xD1\xA4" => "\xD1\xA5", # CYRILLIC CAPITAL LETTER IOTIFIED E
555							"\xD1\xA6" => "\xD1\xA7", # CYRILLIC CAPITAL LETTER LITTLE YUS
556							"\xD1\xA8" => "\xD1\xA9", # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
557							"\xD1\xAA" => "\xD1\xAB", # CYRILLIC CAPITAL LETTER BIG YUS
558							"\xD1\xAC" => "\xD1\xAD", # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
559							"\xD1\xAE" => "\xD1\xAF", # CYRILLIC CAPITAL LETTER KSI
560							"\xD1\xB0" => "\xD1\xB1", # CYRILLIC CAPITAL LETTER PSI
561							"\xD1\xB2" => "\xD1\xB3", # CYRILLIC CAPITAL LETTER FITA
562							"\xD1\xB4" => "\xD1\xB5", # CYRILLIC CAPITAL LETTER IZHITSA
563							"\xD1\xB6" => "\xD1\xB7", # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
564							"\xD1\xB8" => "\xD1\xB9", # CYRILLIC CAPITAL LETTER UK
565							"\xD1\xBA" => "\xD1\xBB", # CYRILLIC CAPITAL LETTER ROUND OMEGA
566							"\xD1\xBC" => "\xD1\xBD", # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
567							"\xD1\xBE" => "\xD1\xBF", # CYRILLIC CAPITAL LETTER OT
568							"\xD2\x80" => "\xD2\x81", # CYRILLIC CAPITAL LETTER KOPPA
569							"\xD2\x8A" => "\xD2\x8B", # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
570							"\xD2\x8C" => "\xD2\x8D", # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
571							"\xD2\x8E" => "\xD2\x8F", # CYRILLIC CAPITAL LETTER ER WITH TICK
572							"\xD2\x90" => "\xD2\x91", # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
573							"\xD2\x92" => "\xD2\x93", # CYRILLIC CAPITAL LETTER GHE WITH STROKE
574							"\xD2\x94" => "\xD2\x95", # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
575							"\xD2\x96" => "\xD2\x97", # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
576							"\xD2\x98" => "\xD2\x99", # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
577							"\xD2\x9A" => "\xD2\x9B", # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
578							"\xD2\x9C" => "\xD2\x9D", # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
579							"\xD2\x9E" => "\xD2\x9F", # CYRILLIC CAPITAL LETTER KA WITH STROKE
580							"\xD2\xA0" => "\xD2\xA1", # CYRILLIC CAPITAL LETTER BASHKIR KA
581							"\xD2\xA2" => "\xD2\xA3", # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
582							"\xD2\xA4" => "\xD2\xA5", # CYRILLIC CAPITAL LIGATURE EN GHE
583							"\xD2\xA6" => "\xD2\xA7", # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
584							"\xD2\xA8" => "\xD2\xA9", # CYRILLIC CAPITAL LETTER ABKHASIAN HA
585							"\xD2\xAA" => "\xD2\xAB", # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
586							"\xD2\xAC" => "\xD2\xAD", # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
587							"\xD2\xAE" => "\xD2\xAF", # CYRILLIC CAPITAL LETTER STRAIGHT U
588							"\xD2\xB0" => "\xD2\xB1", # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
589							"\xD2\xB2" => "\xD2\xB3", # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
590							"\xD2\xB4" => "\xD2\xB5", # CYRILLIC CAPITAL LIGATURE TE TSE
591							"\xD2\xB6" => "\xD2\xB7", # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
592							"\xD2\xB8" => "\xD2\xB9", # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
593							"\xD2\xBA" => "\xD2\xBB", # CYRILLIC CAPITAL LETTER SHHA
594							"\xD2\xBC" => "\xD2\xBD", # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
595							"\xD2\xBE" => "\xD2\xBF", # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
596							"\xD3\x80" => "\xD3\x8F", # CYRILLIC LETTER PALOCHKA
597							"\xD3\x81" => "\xD3\x82", # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
598							"\xD3\x83" => "\xD3\x84", # CYRILLIC CAPITAL LETTER KA WITH HOOK
599							"\xD3\x85" => "\xD3\x86", # CYRILLIC CAPITAL LETTER EL WITH TAIL
600							"\xD3\x87" => "\xD3\x88", # CYRILLIC CAPITAL LETTER EN WITH HOOK
601							"\xD3\x89" => "\xD3\x8A", # CYRILLIC CAPITAL LETTER EN WITH TAIL
602							"\xD3\x8B" => "\xD3\x8C", # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
603							"\xD3\x8D" => "\xD3\x8E", # CYRILLIC CAPITAL LETTER EM WITH TAIL
604							"\xD3\x90" => "\xD3\x91", # CYRILLIC CAPITAL LETTER A WITH BREVE
605							"\xD3\x92" => "\xD3\x93", # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
606							"\xD3\x94" => "\xD3\x95", # CYRILLIC CAPITAL LIGATURE A IE
607							"\xD3\x96" => "\xD3\x97", # CYRILLIC CAPITAL LETTER IE WITH BREVE
608							"\xD3\x98" => "\xD3\x99", # CYRILLIC CAPITAL LETTER SCHWA
609							"\xD3\x9A" => "\xD3\x9B", # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
610							"\xD3\x9C" => "\xD3\x9D", # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
611							"\xD3\x9E" => "\xD3\x9F", # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
612							"\xD3\xA0" => "\xD3\xA1", # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
613							"\xD3\xA2" => "\xD3\xA3", # CYRILLIC CAPITAL LETTER I WITH MACRON
614							"\xD3\xA4" => "\xD3\xA5", # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
615							"\xD3\xA6" => "\xD3\xA7", # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
616							"\xD3\xA8" => "\xD3\xA9", # CYRILLIC CAPITAL LETTER BARRED O
617							"\xD3\xAA" => "\xD3\xAB", # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
618							"\xD3\xAC" => "\xD3\xAD", # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
619							"\xD3\xAE" => "\xD3\xAF", # CYRILLIC CAPITAL LETTER U WITH MACRON
620							"\xD3\xB0" => "\xD3\xB1", # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
621							"\xD3\xB2" => "\xD3\xB3", # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
622							"\xD3\xB4" => "\xD3\xB5", # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
623							"\xD3\xB6" => "\xD3\xB7", # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
624							"\xD3\xB8" => "\xD3\xB9", # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
625							"\xD3\xBA" => "\xD3\xBB", # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
626							"\xD3\xBC" => "\xD3\xBD", # CYRILLIC CAPITAL LETTER HA WITH HOOK
627							"\xD3\xBE" => "\xD3\xBF", # CYRILLIC CAPITAL LETTER HA WITH STROKE
628							"\xD4\x80" => "\xD4\x81", # CYRILLIC CAPITAL LETTER KOMI DE
629							"\xD4\x82" => "\xD4\x83", # CYRILLIC CAPITAL LETTER KOMI DJE
630							"\xD4\x84" => "\xD4\x85", # CYRILLIC CAPITAL LETTER KOMI ZJE
631							"\xD4\x86" => "\xD4\x87", # CYRILLIC CAPITAL LETTER KOMI DZJE
632							"\xD4\x88" => "\xD4\x89", # CYRILLIC CAPITAL LETTER KOMI LJE
633							"\xD4\x8A" => "\xD4\x8B", # CYRILLIC CAPITAL LETTER KOMI NJE
634							"\xD4\x8C" => "\xD4\x8D", # CYRILLIC CAPITAL LETTER KOMI SJE
635							"\xD4\x8E" => "\xD4\x8F", # CYRILLIC CAPITAL LETTER KOMI TJE
636							"\xD4\x90" => "\xD4\x91", # CYRILLIC CAPITAL LETTER REVERSED ZE
637							"\xD4\x92" => "\xD4\x93", # CYRILLIC CAPITAL LETTER EL WITH HOOK
638							"\xD4\x94" => "\xD4\x95", # CYRILLIC CAPITAL LETTER LHA
639							"\xD4\x96" => "\xD4\x97", # CYRILLIC CAPITAL LETTER RHA
640							"\xD4\x98" => "\xD4\x99", # CYRILLIC CAPITAL LETTER YAE
641							"\xD4\x9A" => "\xD4\x9B", # CYRILLIC CAPITAL LETTER QA
642							"\xD4\x9C" => "\xD4\x9D", # CYRILLIC CAPITAL LETTER WE
643							"\xD4\x9E" => "\xD4\x9F", # CYRILLIC CAPITAL LETTER ALEUT KA
644							"\xD4\xA0" => "\xD4\xA1", # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
645							"\xD4\xA2" => "\xD4\xA3", # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
646							"\xD4\xA4" => "\xD4\xA5", # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
647							"\xD4\xA6" => "\xD4\xA7", # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
648							"\xD4\xA8" => "\xD4\xA9", # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
649							"\xD4\xAA" => "\xD4\xAB", # CYRILLIC CAPITAL LETTER DZZHE
650							"\xD4\xAC" => "\xD4\xAD", # CYRILLIC CAPITAL LETTER DCHE
651							"\xD4\xAE" => "\xD4\xAF", # CYRILLIC CAPITAL LETTER EL WITH DESCENDER
652							"\xD4\xB1" => "\xD5\xA1", # ARMENIAN CAPITAL LETTER AYB
653							"\xD4\xB2" => "\xD5\xA2", # ARMENIAN CAPITAL LETTER BEN
654							"\xD4\xB3" => "\xD5\xA3", # ARMENIAN CAPITAL LETTER GIM
655							"\xD4\xB4" => "\xD5\xA4", # ARMENIAN CAPITAL LETTER DA
656							"\xD4\xB5" => "\xD5\xA5", # ARMENIAN CAPITAL LETTER ECH
657							"\xD4\xB6" => "\xD5\xA6", # ARMENIAN CAPITAL LETTER ZA
658							"\xD4\xB7" => "\xD5\xA7", # ARMENIAN CAPITAL LETTER EH
659							"\xD4\xB8" => "\xD5\xA8", # ARMENIAN CAPITAL LETTER ET
660							"\xD4\xB9" => "\xD5\xA9", # ARMENIAN CAPITAL LETTER TO
661							"\xD4\xBA" => "\xD5\xAA", # ARMENIAN CAPITAL LETTER ZHE
662							"\xD4\xBB" => "\xD5\xAB", # ARMENIAN CAPITAL LETTER INI
663							"\xD4\xBC" => "\xD5\xAC", # ARMENIAN CAPITAL LETTER LIWN
664							"\xD4\xBD" => "\xD5\xAD", # ARMENIAN CAPITAL LETTER XEH
665							"\xD4\xBE" => "\xD5\xAE", # ARMENIAN CAPITAL LETTER CA
666							"\xD4\xBF" => "\xD5\xAF", # ARMENIAN CAPITAL LETTER KEN
667							"\xD5\x80" => "\xD5\xB0", # ARMENIAN CAPITAL LETTER HO
668							"\xD5\x81" => "\xD5\xB1", # ARMENIAN CAPITAL LETTER JA
669							"\xD5\x82" => "\xD5\xB2", # ARMENIAN CAPITAL LETTER GHAD
670							"\xD5\x83" => "\xD5\xB3", # ARMENIAN CAPITAL LETTER CHEH
671							"\xD5\x84" => "\xD5\xB4", # ARMENIAN CAPITAL LETTER MEN
672							"\xD5\x85" => "\xD5\xB5", # ARMENIAN CAPITAL LETTER YI
673							"\xD5\x86" => "\xD5\xB6", # ARMENIAN CAPITAL LETTER NOW
674							"\xD5\x87" => "\xD5\xB7", # ARMENIAN CAPITAL LETTER SHA
675							"\xD5\x88" => "\xD5\xB8", # ARMENIAN CAPITAL LETTER VO
676							"\xD5\x89" => "\xD5\xB9", # ARMENIAN CAPITAL LETTER CHA
677							"\xD5\x8A" => "\xD5\xBA", # ARMENIAN CAPITAL LETTER PEH
678							"\xD5\x8B" => "\xD5\xBB", # ARMENIAN CAPITAL LETTER JHEH
679							"\xD5\x8C" => "\xD5\xBC", # ARMENIAN CAPITAL LETTER RA
680							"\xD5\x8D" => "\xD5\xBD", # ARMENIAN CAPITAL LETTER SEH
681							"\xD5\x8E" => "\xD5\xBE", # ARMENIAN CAPITAL LETTER VEW
682							"\xD5\x8F" => "\xD5\xBF", # ARMENIAN CAPITAL LETTER TIWN
683							"\xD5\x90" => "\xD6\x80", # ARMENIAN CAPITAL LETTER REH
684							"\xD5\x91" => "\xD6\x81", # ARMENIAN CAPITAL LETTER CO
685							"\xD5\x92" => "\xD6\x82", # ARMENIAN CAPITAL LETTER YIWN
686							"\xD5\x93" => "\xD6\x83", # ARMENIAN CAPITAL LETTER PIWR
687							"\xD5\x94" => "\xD6\x84", # ARMENIAN CAPITAL LETTER KEH
688							"\xD5\x95" => "\xD6\x85", # ARMENIAN CAPITAL LETTER OH
689							"\xD5\x96" => "\xD6\x86", # ARMENIAN CAPITAL LETTER FEH
690							"\xD6\x87" => "\xD5\xA5\xD6\x82", # ARMENIAN SMALL LIGATURE ECH YIWN
691							"\xE1\x82\xA0" => "\xE2\xB4\x80", # GEORGIAN CAPITAL LETTER AN
692							"\xE1\x82\xA1" => "\xE2\xB4\x81", # GEORGIAN CAPITAL LETTER BAN
693							"\xE1\x82\xA2" => "\xE2\xB4\x82", # GEORGIAN CAPITAL LETTER GAN
694							"\xE1\x82\xA3" => "\xE2\xB4\x83", # GEORGIAN CAPITAL LETTER DON
695							"\xE1\x82\xA4" => "\xE2\xB4\x84", # GEORGIAN CAPITAL LETTER EN
696							"\xE1\x82\xA5" => "\xE2\xB4\x85", # GEORGIAN CAPITAL LETTER VIN
697							"\xE1\x82\xA6" => "\xE2\xB4\x86", # GEORGIAN CAPITAL LETTER ZEN
698							"\xE1\x82\xA7" => "\xE2\xB4\x87", # GEORGIAN CAPITAL LETTER TAN
699							"\xE1\x82\xA8" => "\xE2\xB4\x88", # GEORGIAN CAPITAL LETTER IN
700							"\xE1\x82\xA9" => "\xE2\xB4\x89", # GEORGIAN CAPITAL LETTER KAN
701							"\xE1\x82\xAA" => "\xE2\xB4\x8A", # GEORGIAN CAPITAL LETTER LAS
702							"\xE1\x82\xAB" => "\xE2\xB4\x8B", # GEORGIAN CAPITAL LETTER MAN
703							"\xE1\x82\xAC" => "\xE2\xB4\x8C", # GEORGIAN CAPITAL LETTER NAR
704							"\xE1\x82\xAD" => "\xE2\xB4\x8D", # GEORGIAN CAPITAL LETTER ON
705							"\xE1\x82\xAE" => "\xE2\xB4\x8E", # GEORGIAN CAPITAL LETTER PAR
706							"\xE1\x82\xAF" => "\xE2\xB4\x8F", # GEORGIAN CAPITAL LETTER ZHAR
707							"\xE1\x82\xB0" => "\xE2\xB4\x90", # GEORGIAN CAPITAL LETTER RAE
708							"\xE1\x82\xB1" => "\xE2\xB4\x91", # GEORGIAN CAPITAL LETTER SAN
709							"\xE1\x82\xB2" => "\xE2\xB4\x92", # GEORGIAN CAPITAL LETTER TAR
710							"\xE1\x82\xB3" => "\xE2\xB4\x93", # GEORGIAN CAPITAL LETTER UN
711							"\xE1\x82\xB4" => "\xE2\xB4\x94", # GEORGIAN CAPITAL LETTER PHAR
712							"\xE1\x82\xB5" => "\xE2\xB4\x95", # GEORGIAN CAPITAL LETTER KHAR
713							"\xE1\x82\xB6" => "\xE2\xB4\x96", # GEORGIAN CAPITAL LETTER GHAN
714							"\xE1\x82\xB7" => "\xE2\xB4\x97", # GEORGIAN CAPITAL LETTER QAR
715							"\xE1\x82\xB8" => "\xE2\xB4\x98", # GEORGIAN CAPITAL LETTER SHIN
716							"\xE1\x82\xB9" => "\xE2\xB4\x99", # GEORGIAN CAPITAL LETTER CHIN
717							"\xE1\x82\xBA" => "\xE2\xB4\x9A", # GEORGIAN CAPITAL LETTER CAN
718							"\xE1\x82\xBB" => "\xE2\xB4\x9B", # GEORGIAN CAPITAL LETTER JIL
719							"\xE1\x82\xBC" => "\xE2\xB4\x9C", # GEORGIAN CAPITAL LETTER CIL
720							"\xE1\x82\xBD" => "\xE2\xB4\x9D", # GEORGIAN CAPITAL LETTER CHAR
721							"\xE1\x82\xBE" => "\xE2\xB4\x9E", # GEORGIAN CAPITAL LETTER XAN
722							"\xE1\x82\xBF" => "\xE2\xB4\x9F", # GEORGIAN CAPITAL LETTER JHAN
723							"\xE1\x83\x80" => "\xE2\xB4\xA0", # GEORGIAN CAPITAL LETTER HAE
724							"\xE1\x83\x81" => "\xE2\xB4\xA1", # GEORGIAN CAPITAL LETTER HE
725							"\xE1\x83\x82" => "\xE2\xB4\xA2", # GEORGIAN CAPITAL LETTER HIE
726							"\xE1\x83\x83" => "\xE2\xB4\xA3", # GEORGIAN CAPITAL LETTER WE
727							"\xE1\x83\x84" => "\xE2\xB4\xA4", # GEORGIAN CAPITAL LETTER HAR
728							"\xE1\x83\x85" => "\xE2\xB4\xA5", # GEORGIAN CAPITAL LETTER HOE
729							"\xE1\x83\x87" => "\xE2\xB4\xA7", # GEORGIAN CAPITAL LETTER YN
730							"\xE1\x83\x8D" => "\xE2\xB4\xAD", # GEORGIAN CAPITAL LETTER AEN
731							"\xE1\x8F\xB8" => "\xE1\x8F\xB0", # CHEROKEE SMALL LETTER YE
732							"\xE1\x8F\xB9" => "\xE1\x8F\xB1", # CHEROKEE SMALL LETTER YI
733							"\xE1\x8F\xBA" => "\xE1\x8F\xB2", # CHEROKEE SMALL LETTER YO
734							"\xE1\x8F\xBB" => "\xE1\x8F\xB3", # CHEROKEE SMALL LETTER YU
735							"\xE1\x8F\xBC" => "\xE1\x8F\xB4", # CHEROKEE SMALL LETTER YV
736							"\xE1\x8F\xBD" => "\xE1\x8F\xB5", # CHEROKEE SMALL LETTER MV
737							"\xE1\xB2\x80" => "\xD0\xB2", # CYRILLIC SMALL LETTER ROUNDED VE
738							"\xE1\xB2\x81" => "\xD0\xB4", # CYRILLIC SMALL LETTER LONG-LEGGED DE
739							"\xE1\xB2\x82" => "\xD0\xBE", # CYRILLIC SMALL LETTER NARROW O
740							"\xE1\xB2\x83" => "\xD1\x81", # CYRILLIC SMALL LETTER WIDE ES
741							"\xE1\xB2\x84" => "\xD1\x82", # CYRILLIC SMALL LETTER TALL TE
742							"\xE1\xB2\x85" => "\xD1\x82", # CYRILLIC SMALL LETTER THREE-LEGGED TE
743							"\xE1\xB2\x86" => "\xD1\x8A", # CYRILLIC SMALL LETTER TALL HARD SIGN
744							"\xE1\xB2\x87" => "\xD1\xA3", # CYRILLIC SMALL LETTER TALL YAT
745							"\xE1\xB2\x88" => "\xEA\x99\x8B", # CYRILLIC SMALL LETTER UNBLENDED UK
746							"\xE1\xB8\x80" => "\xE1\xB8\x81", # LATIN CAPITAL LETTER A WITH RING BELOW
747							"\xE1\xB8\x82" => "\xE1\xB8\x83", # LATIN CAPITAL LETTER B WITH DOT ABOVE
748							"\xE1\xB8\x84" => "\xE1\xB8\x85", # LATIN CAPITAL LETTER B WITH DOT BELOW
749							"\xE1\xB8\x86" => "\xE1\xB8\x87", # LATIN CAPITAL LETTER B WITH LINE BELOW
750							"\xE1\xB8\x88" => "\xE1\xB8\x89", # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
751							"\xE1\xB8\x8A" => "\xE1\xB8\x8B", # LATIN CAPITAL LETTER D WITH DOT ABOVE
752							"\xE1\xB8\x8C" => "\xE1\xB8\x8D", # LATIN CAPITAL LETTER D WITH DOT BELOW
753							"\xE1\xB8\x8E" => "\xE1\xB8\x8F", # LATIN CAPITAL LETTER D WITH LINE BELOW
754							"\xE1\xB8\x90" => "\xE1\xB8\x91", # LATIN CAPITAL LETTER D WITH CEDILLA
755							"\xE1\xB8\x92" => "\xE1\xB8\x93", # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
756							"\xE1\xB8\x94" => "\xE1\xB8\x95", # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
757							"\xE1\xB8\x96" => "\xE1\xB8\x97", # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
758							"\xE1\xB8\x98" => "\xE1\xB8\x99", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
759							"\xE1\xB8\x9A" => "\xE1\xB8\x9B", # LATIN CAPITAL LETTER E WITH TILDE BELOW
760							"\xE1\xB8\x9C" => "\xE1\xB8\x9D", # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
761							"\xE1\xB8\x9E" => "\xE1\xB8\x9F", # LATIN CAPITAL LETTER F WITH DOT ABOVE
762							"\xE1\xB8\xA0" => "\xE1\xB8\xA1", # LATIN CAPITAL LETTER G WITH MACRON
763							"\xE1\xB8\xA2" => "\xE1\xB8\xA3", # LATIN CAPITAL LETTER H WITH DOT ABOVE
764							"\xE1\xB8\xA4" => "\xE1\xB8\xA5", # LATIN CAPITAL LETTER H WITH DOT BELOW
765							"\xE1\xB8\xA6" => "\xE1\xB8\xA7", # LATIN CAPITAL LETTER H WITH DIAERESIS
766							"\xE1\xB8\xA8" => "\xE1\xB8\xA9", # LATIN CAPITAL LETTER H WITH CEDILLA
767							"\xE1\xB8\xAA" => "\xE1\xB8\xAB", # LATIN CAPITAL LETTER H WITH BREVE BELOW
768							"\xE1\xB8\xAC" => "\xE1\xB8\xAD", # LATIN CAPITAL LETTER I WITH TILDE BELOW
769							"\xE1\xB8\xAE" => "\xE1\xB8\xAF", # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
770							"\xE1\xB8\xB0" => "\xE1\xB8\xB1", # LATIN CAPITAL LETTER K WITH ACUTE
771							"\xE1\xB8\xB2" => "\xE1\xB8\xB3", # LATIN CAPITAL LETTER K WITH DOT BELOW
772							"\xE1\xB8\xB4" => "\xE1\xB8\xB5", # LATIN CAPITAL LETTER K WITH LINE BELOW
773							"\xE1\xB8\xB6" => "\xE1\xB8\xB7", # LATIN CAPITAL LETTER L WITH DOT BELOW
774							"\xE1\xB8\xB8" => "\xE1\xB8\xB9", # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
775							"\xE1\xB8\xBA" => "\xE1\xB8\xBB", # LATIN CAPITAL LETTER L WITH LINE BELOW
776							"\xE1\xB8\xBC" => "\xE1\xB8\xBD", # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
777							"\xE1\xB8\xBE" => "\xE1\xB8\xBF", # LATIN CAPITAL LETTER M WITH ACUTE
778							"\xE1\xB9\x80" => "\xE1\xB9\x81", # LATIN CAPITAL LETTER M WITH DOT ABOVE
779							"\xE1\xB9\x82" => "\xE1\xB9\x83", # LATIN CAPITAL LETTER M WITH DOT BELOW
780							"\xE1\xB9\x84" => "\xE1\xB9\x85", # LATIN CAPITAL LETTER N WITH DOT ABOVE
781							"\xE1\xB9\x86" => "\xE1\xB9\x87", # LATIN CAPITAL LETTER N WITH DOT BELOW
782							"\xE1\xB9\x88" => "\xE1\xB9\x89", # LATIN CAPITAL LETTER N WITH LINE BELOW
783							"\xE1\xB9\x8A" => "\xE1\xB9\x8B", # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
784							"\xE1\xB9\x8C" => "\xE1\xB9\x8D", # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
785							"\xE1\xB9\x8E" => "\xE1\xB9\x8F", # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
786							"\xE1\xB9\x90" => "\xE1\xB9\x91", # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
787							"\xE1\xB9\x92" => "\xE1\xB9\x93", # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
788							"\xE1\xB9\x94" => "\xE1\xB9\x95", # LATIN CAPITAL LETTER P WITH ACUTE
789							"\xE1\xB9\x96" => "\xE1\xB9\x97", # LATIN CAPITAL LETTER P WITH DOT ABOVE
790							"\xE1\xB9\x98" => "\xE1\xB9\x99", # LATIN CAPITAL LETTER R WITH DOT ABOVE
791							"\xE1\xB9\x9A" => "\xE1\xB9\x9B", # LATIN CAPITAL LETTER R WITH DOT BELOW
792							"\xE1\xB9\x9C" => "\xE1\xB9\x9D", # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
793							"\xE1\xB9\x9E" => "\xE1\xB9\x9F", # LATIN CAPITAL LETTER R WITH LINE BELOW
794							"\xE1\xB9\xA0" => "\xE1\xB9\xA1", # LATIN CAPITAL LETTER S WITH DOT ABOVE
795							"\xE1\xB9\xA2" => "\xE1\xB9\xA3", # LATIN CAPITAL LETTER S WITH DOT BELOW
796							"\xE1\xB9\xA4" => "\xE1\xB9\xA5", # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
797							"\xE1\xB9\xA6" => "\xE1\xB9\xA7", # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
798							"\xE1\xB9\xA8" => "\xE1\xB9\xA9", # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
799							"\xE1\xB9\xAA" => "\xE1\xB9\xAB", # LATIN CAPITAL LETTER T WITH DOT ABOVE
800							"\xE1\xB9\xAC" => "\xE1\xB9\xAD", # LATIN CAPITAL LETTER T WITH DOT BELOW
801							"\xE1\xB9\xAE" => "\xE1\xB9\xAF", # LATIN CAPITAL LETTER T WITH LINE BELOW
802							"\xE1\xB9\xB0" => "\xE1\xB9\xB1", # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
803							"\xE1\xB9\xB2" => "\xE1\xB9\xB3", # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
804							"\xE1\xB9\xB4" => "\xE1\xB9\xB5", # LATIN CAPITAL LETTER U WITH TILDE BELOW
805							"\xE1\xB9\xB6" => "\xE1\xB9\xB7", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
806							"\xE1\xB9\xB8" => "\xE1\xB9\xB9", # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
807							"\xE1\xB9\xBA" => "\xE1\xB9\xBB", # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
808							"\xE1\xB9\xBC" => "\xE1\xB9\xBD", # LATIN CAPITAL LETTER V WITH TILDE
809							"\xE1\xB9\xBE" => "\xE1\xB9\xBF", # LATIN CAPITAL LETTER V WITH DOT BELOW
810							"\xE1\xBA\x80" => "\xE1\xBA\x81", # LATIN CAPITAL LETTER W WITH GRAVE
811							"\xE1\xBA\x82" => "\xE1\xBA\x83", # LATIN CAPITAL LETTER W WITH ACUTE
812							"\xE1\xBA\x84" => "\xE1\xBA\x85", # LATIN CAPITAL LETTER W WITH DIAERESIS
813							"\xE1\xBA\x86" => "\xE1\xBA\x87", # LATIN CAPITAL LETTER W WITH DOT ABOVE
814							"\xE1\xBA\x88" => "\xE1\xBA\x89", # LATIN CAPITAL LETTER W WITH DOT BELOW
815							"\xE1\xBA\x8A" => "\xE1\xBA\x8B", # LATIN CAPITAL LETTER X WITH DOT ABOVE
816							"\xE1\xBA\x8C" => "\xE1\xBA\x8D", # LATIN CAPITAL LETTER X WITH DIAERESIS
817							"\xE1\xBA\x8E" => "\xE1\xBA\x8F", # LATIN CAPITAL LETTER Y WITH DOT ABOVE
818							"\xE1\xBA\x90" => "\xE1\xBA\x91", # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
819							"\xE1\xBA\x92" => "\xE1\xBA\x93", # LATIN CAPITAL LETTER Z WITH DOT BELOW
820							"\xE1\xBA\x94" => "\xE1\xBA\x95", # LATIN CAPITAL LETTER Z WITH LINE BELOW
821							"\xE1\xBA\x96" => "\x68\xCC\xB1", # LATIN SMALL LETTER H WITH LINE BELOW
822							"\xE1\xBA\x97" => "\x74\xCC\x88", # LATIN SMALL LETTER T WITH DIAERESIS
823							"\xE1\xBA\x98" => "\x77\xCC\x8A", # LATIN SMALL LETTER W WITH RING ABOVE
824							"\xE1\xBA\x99" => "\x79\xCC\x8A", # LATIN SMALL LETTER Y WITH RING ABOVE
825							"\xE1\xBA\x9A" => "\x61\xCA\xBE", # LATIN SMALL LETTER A WITH RIGHT HALF RING
826							"\xE1\xBA\x9B" => "\xE1\xB9\xA1", # LATIN SMALL LETTER LONG S WITH DOT ABOVE
827							"\xE1\xBA\x9E" => "\x73\x73", # LATIN CAPITAL LETTER SHARP S
828							"\xE1\xBA\xA0" => "\xE1\xBA\xA1", # LATIN CAPITAL LETTER A WITH DOT BELOW
829							"\xE1\xBA\xA2" => "\xE1\xBA\xA3", # LATIN CAPITAL LETTER A WITH HOOK ABOVE
830							"\xE1\xBA\xA4" => "\xE1\xBA\xA5", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
831							"\xE1\xBA\xA6" => "\xE1\xBA\xA7", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
832							"\xE1\xBA\xA8" => "\xE1\xBA\xA9", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
833							"\xE1\xBA\xAA" => "\xE1\xBA\xAB", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
834							"\xE1\xBA\xAC" => "\xE1\xBA\xAD", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
835							"\xE1\xBA\xAE" => "\xE1\xBA\xAF", # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
836							"\xE1\xBA\xB0" => "\xE1\xBA\xB1", # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
837							"\xE1\xBA\xB2" => "\xE1\xBA\xB3", # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
838							"\xE1\xBA\xB4" => "\xE1\xBA\xB5", # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
839							"\xE1\xBA\xB6" => "\xE1\xBA\xB7", # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
840							"\xE1\xBA\xB8" => "\xE1\xBA\xB9", # LATIN CAPITAL LETTER E WITH DOT BELOW
841							"\xE1\xBA\xBA" => "\xE1\xBA\xBB", # LATIN CAPITAL LETTER E WITH HOOK ABOVE
842							"\xE1\xBA\xBC" => "\xE1\xBA\xBD", # LATIN CAPITAL LETTER E WITH TILDE
843							"\xE1\xBA\xBE" => "\xE1\xBA\xBF", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
844							"\xE1\xBB\x80" => "\xE1\xBB\x81", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
845							"\xE1\xBB\x82" => "\xE1\xBB\x83", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
846							"\xE1\xBB\x84" => "\xE1\xBB\x85", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
847							"\xE1\xBB\x86" => "\xE1\xBB\x87", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
848							"\xE1\xBB\x88" => "\xE1\xBB\x89", # LATIN CAPITAL LETTER I WITH HOOK ABOVE
849							"\xE1\xBB\x8A" => "\xE1\xBB\x8B", # LATIN CAPITAL LETTER I WITH DOT BELOW
850							"\xE1\xBB\x8C" => "\xE1\xBB\x8D", # LATIN CAPITAL LETTER O WITH DOT BELOW
851							"\xE1\xBB\x8E" => "\xE1\xBB\x8F", # LATIN CAPITAL LETTER O WITH HOOK ABOVE
852							"\xE1\xBB\x90" => "\xE1\xBB\x91", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
853							"\xE1\xBB\x92" => "\xE1\xBB\x93", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
854							"\xE1\xBB\x94" => "\xE1\xBB\x95", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
855							"\xE1\xBB\x96" => "\xE1\xBB\x97", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
856							"\xE1\xBB\x98" => "\xE1\xBB\x99", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
857							"\xE1\xBB\x9A" => "\xE1\xBB\x9B", # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
858							"\xE1\xBB\x9C" => "\xE1\xBB\x9D", # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
859							"\xE1\xBB\x9E" => "\xE1\xBB\x9F", # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
860							"\xE1\xBB\xA0" => "\xE1\xBB\xA1", # LATIN CAPITAL LETTER O WITH HORN AND TILDE
861							"\xE1\xBB\xA2" => "\xE1\xBB\xA3", # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
862							"\xE1\xBB\xA4" => "\xE1\xBB\xA5", # LATIN CAPITAL LETTER U WITH DOT BELOW
863							"\xE1\xBB\xA6" => "\xE1\xBB\xA7", # LATIN CAPITAL LETTER U WITH HOOK ABOVE
864							"\xE1\xBB\xA8" => "\xE1\xBB\xA9", # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
865							"\xE1\xBB\xAA" => "\xE1\xBB\xAB", # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
866							"\xE1\xBB\xAC" => "\xE1\xBB\xAD", # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
867							"\xE1\xBB\xAE" => "\xE1\xBB\xAF", # LATIN CAPITAL LETTER U WITH HORN AND TILDE
868							"\xE1\xBB\xB0" => "\xE1\xBB\xB1", # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
869							"\xE1\xBB\xB2" => "\xE1\xBB\xB3", # LATIN CAPITAL LETTER Y WITH GRAVE
870							"\xE1\xBB\xB4" => "\xE1\xBB\xB5", # LATIN CAPITAL LETTER Y WITH DOT BELOW
871							"\xE1\xBB\xB6" => "\xE1\xBB\xB7", # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
872							"\xE1\xBB\xB8" => "\xE1\xBB\xB9", # LATIN CAPITAL LETTER Y WITH TILDE
873							"\xE1\xBB\xBA" => "\xE1\xBB\xBB", # LATIN CAPITAL LETTER MIDDLE-WELSH LL
874							"\xE1\xBB\xBC" => "\xE1\xBB\xBD", # LATIN CAPITAL LETTER MIDDLE-WELSH V
875							"\xE1\xBB\xBE" => "\xE1\xBB\xBF", # LATIN CAPITAL LETTER Y WITH LOOP
876							"\xE1\xBC\x88" => "\xE1\xBC\x80", # GREEK CAPITAL LETTER ALPHA WITH PSILI
877							"\xE1\xBC\x89" => "\xE1\xBC\x81", # GREEK CAPITAL LETTER ALPHA WITH DASIA
878							"\xE1\xBC\x8A" => "\xE1\xBC\x82", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
879							"\xE1\xBC\x8B" => "\xE1\xBC\x83", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
880							"\xE1\xBC\x8C" => "\xE1\xBC\x84", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
881							"\xE1\xBC\x8D" => "\xE1\xBC\x85", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
882							"\xE1\xBC\x8E" => "\xE1\xBC\x86", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
883							"\xE1\xBC\x8F" => "\xE1\xBC\x87", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
884							"\xE1\xBC\x98" => "\xE1\xBC\x90", # GREEK CAPITAL LETTER EPSILON WITH PSILI
885							"\xE1\xBC\x99" => "\xE1\xBC\x91", # GREEK CAPITAL LETTER EPSILON WITH DASIA
886							"\xE1\xBC\x9A" => "\xE1\xBC\x92", # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
887							"\xE1\xBC\x9B" => "\xE1\xBC\x93", # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
888							"\xE1\xBC\x9C" => "\xE1\xBC\x94", # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
889							"\xE1\xBC\x9D" => "\xE1\xBC\x95", # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
890							"\xE1\xBC\xA8" => "\xE1\xBC\xA0", # GREEK CAPITAL LETTER ETA WITH PSILI
891							"\xE1\xBC\xA9" => "\xE1\xBC\xA1", # GREEK CAPITAL LETTER ETA WITH DASIA
892							"\xE1\xBC\xAA" => "\xE1\xBC\xA2", # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
893							"\xE1\xBC\xAB" => "\xE1\xBC\xA3", # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
894							"\xE1\xBC\xAC" => "\xE1\xBC\xA4", # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
895							"\xE1\xBC\xAD" => "\xE1\xBC\xA5", # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
896							"\xE1\xBC\xAE" => "\xE1\xBC\xA6", # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
897							"\xE1\xBC\xAF" => "\xE1\xBC\xA7", # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
898							"\xE1\xBC\xB8" => "\xE1\xBC\xB0", # GREEK CAPITAL LETTER IOTA WITH PSILI
899							"\xE1\xBC\xB9" => "\xE1\xBC\xB1", # GREEK CAPITAL LETTER IOTA WITH DASIA
900							"\xE1\xBC\xBA" => "\xE1\xBC\xB2", # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
901							"\xE1\xBC\xBB" => "\xE1\xBC\xB3", # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
902							"\xE1\xBC\xBC" => "\xE1\xBC\xB4", # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
903							"\xE1\xBC\xBD" => "\xE1\xBC\xB5", # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
904							"\xE1\xBC\xBE" => "\xE1\xBC\xB6", # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
905							"\xE1\xBC\xBF" => "\xE1\xBC\xB7", # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
906							"\xE1\xBD\x88" => "\xE1\xBD\x80", # GREEK CAPITAL LETTER OMICRON WITH PSILI
907							"\xE1\xBD\x89" => "\xE1\xBD\x81", # GREEK CAPITAL LETTER OMICRON WITH DASIA
908							"\xE1\xBD\x8A" => "\xE1\xBD\x82", # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
909							"\xE1\xBD\x8B" => "\xE1\xBD\x83", # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
910							"\xE1\xBD\x8C" => "\xE1\xBD\x84", # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
911							"\xE1\xBD\x8D" => "\xE1\xBD\x85", # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
912							"\xE1\xBD\x90" => "\xCF\x85\xCC\x93", # GREEK SMALL LETTER UPSILON WITH PSILI
913							"\xE1\xBD\x92" => "\xCF\x85\xCC\x93\xCC\x80", # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
914							"\xE1\xBD\x94" => "\xCF\x85\xCC\x93\xCC\x81", # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
915							"\xE1\xBD\x96" => "\xCF\x85\xCC\x93\xCD\x82", # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
916							"\xE1\xBD\x99" => "\xE1\xBD\x91", # GREEK CAPITAL LETTER UPSILON WITH DASIA
917							"\xE1\xBD\x9B" => "\xE1\xBD\x93", # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
918							"\xE1\xBD\x9D" => "\xE1\xBD\x95", # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
919							"\xE1\xBD\x9F" => "\xE1\xBD\x97", # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
920							"\xE1\xBD\xA8" => "\xE1\xBD\xA0", # GREEK CAPITAL LETTER OMEGA WITH PSILI
921							"\xE1\xBD\xA9" => "\xE1\xBD\xA1", # GREEK CAPITAL LETTER OMEGA WITH DASIA
922							"\xE1\xBD\xAA" => "\xE1\xBD\xA2", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
923							"\xE1\xBD\xAB" => "\xE1\xBD\xA3", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
924							"\xE1\xBD\xAC" => "\xE1\xBD\xA4", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
925							"\xE1\xBD\xAD" => "\xE1\xBD\xA5", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
926							"\xE1\xBD\xAE" => "\xE1\xBD\xA6", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
927							"\xE1\xBD\xAF" => "\xE1\xBD\xA7", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
928							"\xE1\xBE\x80" => "\xE1\xBC\x80\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
929							"\xE1\xBE\x81" => "\xE1\xBC\x81\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
930							"\xE1\xBE\x82" => "\xE1\xBC\x82\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
931							"\xE1\xBE\x83" => "\xE1\xBC\x83\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
932							"\xE1\xBE\x84" => "\xE1\xBC\x84\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
933							"\xE1\xBE\x85" => "\xE1\xBC\x85\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
934							"\xE1\xBE\x86" => "\xE1\xBC\x86\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
935							"\xE1\xBE\x87" => "\xE1\xBC\x87\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
936							"\xE1\xBE\x88" => "\xE1\xBC\x80\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
937							"\xE1\xBE\x89" => "\xE1\xBC\x81\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
938							"\xE1\xBE\x8A" => "\xE1\xBC\x82\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
939							"\xE1\xBE\x8B" => "\xE1\xBC\x83\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
940							"\xE1\xBE\x8C" => "\xE1\xBC\x84\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
941							"\xE1\xBE\x8D" => "\xE1\xBC\x85\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
942							"\xE1\xBE\x8E" => "\xE1\xBC\x86\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
943							"\xE1\xBE\x8F" => "\xE1\xBC\x87\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
944							"\xE1\xBE\x90" => "\xE1\xBC\xA0\xCE\xB9", # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
945							"\xE1\xBE\x91" => "\xE1\xBC\xA1\xCE\xB9", # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
946							"\xE1\xBE\x92" => "\xE1\xBC\xA2\xCE\xB9", # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
947							"\xE1\xBE\x93" => "\xE1\xBC\xA3\xCE\xB9", # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
948							"\xE1\xBE\x94" => "\xE1\xBC\xA4\xCE\xB9", # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
949							"\xE1\xBE\x95" => "\xE1\xBC\xA5\xCE\xB9", # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
950							"\xE1\xBE\x96" => "\xE1\xBC\xA6\xCE\xB9", # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
951							"\xE1\xBE\x97" => "\xE1\xBC\xA7\xCE\xB9", # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
952							"\xE1\xBE\x98" => "\xE1\xBC\xA0\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
953							"\xE1\xBE\x99" => "\xE1\xBC\xA1\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
954							"\xE1\xBE\x9A" => "\xE1\xBC\xA2\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
955							"\xE1\xBE\x9B" => "\xE1\xBC\xA3\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
956							"\xE1\xBE\x9C" => "\xE1\xBC\xA4\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
957							"\xE1\xBE\x9D" => "\xE1\xBC\xA5\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
958							"\xE1\xBE\x9E" => "\xE1\xBC\xA6\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
959							"\xE1\xBE\x9F" => "\xE1\xBC\xA7\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
960							"\xE1\xBE\xA0" => "\xE1\xBD\xA0\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
961							"\xE1\xBE\xA1" => "\xE1\xBD\xA1\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
962							"\xE1\xBE\xA2" => "\xE1\xBD\xA2\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
963							"\xE1\xBE\xA3" => "\xE1\xBD\xA3\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
964							"\xE1\xBE\xA4" => "\xE1\xBD\xA4\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
965							"\xE1\xBE\xA5" => "\xE1\xBD\xA5\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
966							"\xE1\xBE\xA6" => "\xE1\xBD\xA6\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
967							"\xE1\xBE\xA7" => "\xE1\xBD\xA7\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
968							"\xE1\xBE\xA8" => "\xE1\xBD\xA0\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
969							"\xE1\xBE\xA9" => "\xE1\xBD\xA1\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
970							"\xE1\xBE\xAA" => "\xE1\xBD\xA2\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
971							"\xE1\xBE\xAB" => "\xE1\xBD\xA3\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
972							"\xE1\xBE\xAC" => "\xE1\xBD\xA4\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
973							"\xE1\xBE\xAD" => "\xE1\xBD\xA5\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
974							"\xE1\xBE\xAE" => "\xE1\xBD\xA6\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
975							"\xE1\xBE\xAF" => "\xE1\xBD\xA7\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
976							"\xE1\xBE\xB2" => "\xE1\xBD\xB0\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
977							"\xE1\xBE\xB3" => "\xCE\xB1\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
978							"\xE1\xBE\xB4" => "\xCE\xAC\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
979							"\xE1\xBE\xB6" => "\xCE\xB1\xCD\x82", # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
980							"\xE1\xBE\xB7" => "\xCE\xB1\xCD\x82\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
981							"\xE1\xBE\xB8" => "\xE1\xBE\xB0", # GREEK CAPITAL LETTER ALPHA WITH VRACHY
982							"\xE1\xBE\xB9" => "\xE1\xBE\xB1", # GREEK CAPITAL LETTER ALPHA WITH MACRON
983							"\xE1\xBE\xBA" => "\xE1\xBD\xB0", # GREEK CAPITAL LETTER ALPHA WITH VARIA
984							"\xE1\xBE\xBB" => "\xE1\xBD\xB1", # GREEK CAPITAL LETTER ALPHA WITH OXIA
985							"\xE1\xBE\xBC" => "\xCE\xB1\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
986							"\xE1\xBE\xBE" => "\xCE\xB9", # GREEK PROSGEGRAMMENI
987							"\xE1\xBF\x82" => "\xE1\xBD\xB4\xCE\xB9", # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
988							"\xE1\xBF\x83" => "\xCE\xB7\xCE\xB9", # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
989							"\xE1\xBF\x84" => "\xCE\xAE\xCE\xB9", # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
990							"\xE1\xBF\x86" => "\xCE\xB7\xCD\x82", # GREEK SMALL LETTER ETA WITH PERISPOMENI
991							"\xE1\xBF\x87" => "\xCE\xB7\xCD\x82\xCE\xB9", # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
992							"\xE1\xBF\x88" => "\xE1\xBD\xB2", # GREEK CAPITAL LETTER EPSILON WITH VARIA
993							"\xE1\xBF\x89" => "\xE1\xBD\xB3", # GREEK CAPITAL LETTER EPSILON WITH OXIA
994							"\xE1\xBF\x8A" => "\xE1\xBD\xB4", # GREEK CAPITAL LETTER ETA WITH VARIA
995							"\xE1\xBF\x8B" => "\xE1\xBD\xB5", # GREEK CAPITAL LETTER ETA WITH OXIA
996							"\xE1\xBF\x8C" => "\xCE\xB7\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
997							"\xE1\xBF\x92" => "\xCE\xB9\xCC\x88\xCC\x80", # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
998							"\xE1\xBF\x93" => "\xCE\xB9\xCC\x88\xCC\x81", # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
999							"\xE1\xBF\x96" => "\xCE\xB9\xCD\x82", # GREEK SMALL LETTER IOTA WITH PERISPOMENI
1000							"\xE1\xBF\x97" => "\xCE\xB9\xCC\x88\xCD\x82", # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
1001							"\xE1\xBF\x98" => "\xE1\xBF\x90", # GREEK CAPITAL LETTER IOTA WITH VRACHY
1002							"\xE1\xBF\x99" => "\xE1\xBF\x91", # GREEK CAPITAL LETTER IOTA WITH MACRON
1003							"\xE1\xBF\x9A" => "\xE1\xBD\xB6", # GREEK CAPITAL LETTER IOTA WITH VARIA
1004							"\xE1\xBF\x9B" => "\xE1\xBD\xB7", # GREEK CAPITAL LETTER IOTA WITH OXIA
1005							"\xE1\xBF\xA2" => "\xCF\x85\xCC\x88\xCC\x80", # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
1006							"\xE1\xBF\xA3" => "\xCF\x85\xCC\x88\xCC\x81", # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
1007							"\xE1\xBF\xA4" => "\xCF\x81\xCC\x93", # GREEK SMALL LETTER RHO WITH PSILI
1008							"\xE1\xBF\xA6" => "\xCF\x85\xCD\x82", # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
1009							"\xE1\xBF\xA7" => "\xCF\x85\xCC\x88\xCD\x82", # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
1010							"\xE1\xBF\xA8" => "\xE1\xBF\xA0", # GREEK CAPITAL LETTER UPSILON WITH VRACHY
1011							"\xE1\xBF\xA9" => "\xE1\xBF\xA1", # GREEK CAPITAL LETTER UPSILON WITH MACRON
1012							"\xE1\xBF\xAA" => "\xE1\xBD\xBA", # GREEK CAPITAL LETTER UPSILON WITH VARIA
1013							"\xE1\xBF\xAB" => "\xE1\xBD\xBB", # GREEK CAPITAL LETTER UPSILON WITH OXIA
1014							"\xE1\xBF\xAC" => "\xE1\xBF\xA5", # GREEK CAPITAL LETTER RHO WITH DASIA
1015							"\xE1\xBF\xB2" => "\xE1\xBD\xBC\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
1016							"\xE1\xBF\xB3" => "\xCF\x89\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
1017							"\xE1\xBF\xB4" => "\xCF\x8E\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1018							"\xE1\xBF\xB6" => "\xCF\x89\xCD\x82", # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
1019							"\xE1\xBF\xB7" => "\xCF\x89\xCD\x82\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
1020							"\xE1\xBF\xB8" => "\xE1\xBD\xB8", # GREEK CAPITAL LETTER OMICRON WITH VARIA
1021							"\xE1\xBF\xB9" => "\xE1\xBD\xB9", # GREEK CAPITAL LETTER OMICRON WITH OXIA
1022							"\xE1\xBF\xBA" => "\xE1\xBD\xBC", # GREEK CAPITAL LETTER OMEGA WITH VARIA
1023							"\xE1\xBF\xBB" => "\xE1\xBD\xBD", # GREEK CAPITAL LETTER OMEGA WITH OXIA
1024							"\xE1\xBF\xBC" => "\xCF\x89\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
1025							"\xE2\x84\xA6" => "\xCF\x89", # OHM SIGN
1026							"\xE2\x84\xAA" => "\x6B", # KELVIN SIGN
1027							"\xE2\x84\xAB" => "\xC3\xA5", # ANGSTROM SIGN
1028							"\xE2\x84\xB2" => "\xE2\x85\x8E", # TURNED CAPITAL F
1029							"\xE2\x85\xA0" => "\xE2\x85\xB0", # ROMAN NUMERAL ONE
1030							"\xE2\x85\xA1" => "\xE2\x85\xB1", # ROMAN NUMERAL TWO
1031							"\xE2\x85\xA2" => "\xE2\x85\xB2", # ROMAN NUMERAL THREE
1032							"\xE2\x85\xA3" => "\xE2\x85\xB3", # ROMAN NUMERAL FOUR
1033							"\xE2\x85\xA4" => "\xE2\x85\xB4", # ROMAN NUMERAL FIVE
1034							"\xE2\x85\xA5" => "\xE2\x85\xB5", # ROMAN NUMERAL SIX
1035							"\xE2\x85\xA6" => "\xE2\x85\xB6", # ROMAN NUMERAL SEVEN
1036							"\xE2\x85\xA7" => "\xE2\x85\xB7", # ROMAN NUMERAL EIGHT
1037							"\xE2\x85\xA8" => "\xE2\x85\xB8", # ROMAN NUMERAL NINE
1038							"\xE2\x85\xA9" => "\xE2\x85\xB9", # ROMAN NUMERAL TEN
1039							"\xE2\x85\xAA" => "\xE2\x85\xBA", # ROMAN NUMERAL ELEVEN
1040							"\xE2\x85\xAB" => "\xE2\x85\xBB", # ROMAN NUMERAL TWELVE
1041							"\xE2\x85\xAC" => "\xE2\x85\xBC", # ROMAN NUMERAL FIFTY
1042							"\xE2\x85\xAD" => "\xE2\x85\xBD", # ROMAN NUMERAL ONE HUNDRED
1043							"\xE2\x85\xAE" => "\xE2\x85\xBE", # ROMAN NUMERAL FIVE HUNDRED
1044							"\xE2\x85\xAF" => "\xE2\x85\xBF", # ROMAN NUMERAL ONE THOUSAND
1045							"\xE2\x86\x83" => "\xE2\x86\x84", # ROMAN NUMERAL REVERSED ONE HUNDRED
1046							"\xE2\x92\xB6" => "\xE2\x93\x90", # CIRCLED LATIN CAPITAL LETTER A
1047							"\xE2\x92\xB7" => "\xE2\x93\x91", # CIRCLED LATIN CAPITAL LETTER B
1048							"\xE2\x92\xB8" => "\xE2\x93\x92", # CIRCLED LATIN CAPITAL LETTER C
1049							"\xE2\x92\xB9" => "\xE2\x93\x93", # CIRCLED LATIN CAPITAL LETTER D
1050							"\xE2\x92\xBA" => "\xE2\x93\x94", # CIRCLED LATIN CAPITAL LETTER E
1051							"\xE2\x92\xBB" => "\xE2\x93\x95", # CIRCLED LATIN CAPITAL LETTER F
1052							"\xE2\x92\xBC" => "\xE2\x93\x96", # CIRCLED LATIN CAPITAL LETTER G
1053							"\xE2\x92\xBD" => "\xE2\x93\x97", # CIRCLED LATIN CAPITAL LETTER H
1054							"\xE2\x92\xBE" => "\xE2\x93\x98", # CIRCLED LATIN CAPITAL LETTER I
1055							"\xE2\x92\xBF" => "\xE2\x93\x99", # CIRCLED LATIN CAPITAL LETTER J
1056							"\xE2\x93\x80" => "\xE2\x93\x9A", # CIRCLED LATIN CAPITAL LETTER K
1057							"\xE2\x93\x81" => "\xE2\x93\x9B", # CIRCLED LATIN CAPITAL LETTER L
1058							"\xE2\x93\x82" => "\xE2\x93\x9C", # CIRCLED LATIN CAPITAL LETTER M
1059							"\xE2\x93\x83" => "\xE2\x93\x9D", # CIRCLED LATIN CAPITAL LETTER N
1060							"\xE2\x93\x84" => "\xE2\x93\x9E", # CIRCLED LATIN CAPITAL LETTER O
1061							"\xE2\x93\x85" => "\xE2\x93\x9F", # CIRCLED LATIN CAPITAL LETTER P
1062							"\xE2\x93\x86" => "\xE2\x93\xA0", # CIRCLED LATIN CAPITAL LETTER Q
1063							"\xE2\x93\x87" => "\xE2\x93\xA1", # CIRCLED LATIN CAPITAL LETTER R
1064							"\xE2\x93\x88" => "\xE2\x93\xA2", # CIRCLED LATIN CAPITAL LETTER S
1065							"\xE2\x93\x89" => "\xE2\x93\xA3", # CIRCLED LATIN CAPITAL LETTER T
1066							"\xE2\x93\x8A" => "\xE2\x93\xA4", # CIRCLED LATIN CAPITAL LETTER U
1067							"\xE2\x93\x8B" => "\xE2\x93\xA5", # CIRCLED LATIN CAPITAL LETTER V
1068							"\xE2\x93\x8C" => "\xE2\x93\xA6", # CIRCLED LATIN CAPITAL LETTER W
1069							"\xE2\x93\x8D" => "\xE2\x93\xA7", # CIRCLED LATIN CAPITAL LETTER X
1070							"\xE2\x93\x8E" => "\xE2\x93\xA8", # CIRCLED LATIN CAPITAL LETTER Y
1071							"\xE2\x93\x8F" => "\xE2\x93\xA9", # CIRCLED LATIN CAPITAL LETTER Z
1072							"\xE2\xB0\x80" => "\xE2\xB0\xB0", # GLAGOLITIC CAPITAL LETTER AZU
1073							"\xE2\xB0\x81" => "\xE2\xB0\xB1", # GLAGOLITIC CAPITAL LETTER BUKY
1074							"\xE2\xB0\x82" => "\xE2\xB0\xB2", # GLAGOLITIC CAPITAL LETTER VEDE
1075							"\xE2\xB0\x83" => "\xE2\xB0\xB3", # GLAGOLITIC CAPITAL LETTER GLAGOLI
1076							"\xE2\xB0\x84" => "\xE2\xB0\xB4", # GLAGOLITIC CAPITAL LETTER DOBRO
1077							"\xE2\xB0\x85" => "\xE2\xB0\xB5", # GLAGOLITIC CAPITAL LETTER YESTU
1078							"\xE2\xB0\x86" => "\xE2\xB0\xB6", # GLAGOLITIC CAPITAL LETTER ZHIVETE
1079							"\xE2\xB0\x87" => "\xE2\xB0\xB7", # GLAGOLITIC CAPITAL LETTER DZELO
1080							"\xE2\xB0\x88" => "\xE2\xB0\xB8", # GLAGOLITIC CAPITAL LETTER ZEMLJA
1081							"\xE2\xB0\x89" => "\xE2\xB0\xB9", # GLAGOLITIC CAPITAL LETTER IZHE
1082							"\xE2\xB0\x8A" => "\xE2\xB0\xBA", # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
1083							"\xE2\xB0\x8B" => "\xE2\xB0\xBB", # GLAGOLITIC CAPITAL LETTER I
1084							"\xE2\xB0\x8C" => "\xE2\xB0\xBC", # GLAGOLITIC CAPITAL LETTER DJERVI
1085							"\xE2\xB0\x8D" => "\xE2\xB0\xBD", # GLAGOLITIC CAPITAL LETTER KAKO
1086							"\xE2\xB0\x8E" => "\xE2\xB0\xBE", # GLAGOLITIC CAPITAL LETTER LJUDIJE
1087							"\xE2\xB0\x8F" => "\xE2\xB0\xBF", # GLAGOLITIC CAPITAL LETTER MYSLITE
1088							"\xE2\xB0\x90" => "\xE2\xB1\x80", # GLAGOLITIC CAPITAL LETTER NASHI
1089							"\xE2\xB0\x91" => "\xE2\xB1\x81", # GLAGOLITIC CAPITAL LETTER ONU
1090							"\xE2\xB0\x92" => "\xE2\xB1\x82", # GLAGOLITIC CAPITAL LETTER POKOJI
1091							"\xE2\xB0\x93" => "\xE2\xB1\x83", # GLAGOLITIC CAPITAL LETTER RITSI
1092							"\xE2\xB0\x94" => "\xE2\xB1\x84", # GLAGOLITIC CAPITAL LETTER SLOVO
1093							"\xE2\xB0\x95" => "\xE2\xB1\x85", # GLAGOLITIC CAPITAL LETTER TVRIDO
1094							"\xE2\xB0\x96" => "\xE2\xB1\x86", # GLAGOLITIC CAPITAL LETTER UKU
1095							"\xE2\xB0\x97" => "\xE2\xB1\x87", # GLAGOLITIC CAPITAL LETTER FRITU
1096							"\xE2\xB0\x98" => "\xE2\xB1\x88", # GLAGOLITIC CAPITAL LETTER HERU
1097							"\xE2\xB0\x99" => "\xE2\xB1\x89", # GLAGOLITIC CAPITAL LETTER OTU
1098							"\xE2\xB0\x9A" => "\xE2\xB1\x8A", # GLAGOLITIC CAPITAL LETTER PE
1099							"\xE2\xB0\x9B" => "\xE2\xB1\x8B", # GLAGOLITIC CAPITAL LETTER SHTA
1100							"\xE2\xB0\x9C" => "\xE2\xB1\x8C", # GLAGOLITIC CAPITAL LETTER TSI
1101							"\xE2\xB0\x9D" => "\xE2\xB1\x8D", # GLAGOLITIC CAPITAL LETTER CHRIVI
1102							"\xE2\xB0\x9E" => "\xE2\xB1\x8E", # GLAGOLITIC CAPITAL LETTER SHA
1103							"\xE2\xB0\x9F" => "\xE2\xB1\x8F", # GLAGOLITIC CAPITAL LETTER YERU
1104							"\xE2\xB0\xA0" => "\xE2\xB1\x90", # GLAGOLITIC CAPITAL LETTER YERI
1105							"\xE2\xB0\xA1" => "\xE2\xB1\x91", # GLAGOLITIC CAPITAL LETTER YATI
1106							"\xE2\xB0\xA2" => "\xE2\xB1\x92", # GLAGOLITIC CAPITAL LETTER SPIDERY HA
1107							"\xE2\xB0\xA3" => "\xE2\xB1\x93", # GLAGOLITIC CAPITAL LETTER YU
1108							"\xE2\xB0\xA4" => "\xE2\xB1\x94", # GLAGOLITIC CAPITAL LETTER SMALL YUS
1109							"\xE2\xB0\xA5" => "\xE2\xB1\x95", # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
1110							"\xE2\xB0\xA6" => "\xE2\xB1\x96", # GLAGOLITIC CAPITAL LETTER YO
1111							"\xE2\xB0\xA7" => "\xE2\xB1\x97", # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
1112							"\xE2\xB0\xA8" => "\xE2\xB1\x98", # GLAGOLITIC CAPITAL LETTER BIG YUS
1113							"\xE2\xB0\xA9" => "\xE2\xB1\x99", # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
1114							"\xE2\xB0\xAA" => "\xE2\xB1\x9A", # GLAGOLITIC CAPITAL LETTER FITA
1115							"\xE2\xB0\xAB" => "\xE2\xB1\x9B", # GLAGOLITIC CAPITAL LETTER IZHITSA
1116							"\xE2\xB0\xAC" => "\xE2\xB1\x9C", # GLAGOLITIC CAPITAL LETTER SHTAPIC
1117							"\xE2\xB0\xAD" => "\xE2\xB1\x9D", # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
1118							"\xE2\xB0\xAE" => "\xE2\xB1\x9E", # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
1119							"\xE2\xB1\xA0" => "\xE2\xB1\xA1", # LATIN CAPITAL LETTER L WITH DOUBLE BAR
1120							"\xE2\xB1\xA2" => "\xC9\xAB", # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
1121							"\xE2\xB1\xA3" => "\xE1\xB5\xBD", # LATIN CAPITAL LETTER P WITH STROKE
1122							"\xE2\xB1\xA4" => "\xC9\xBD", # LATIN CAPITAL LETTER R WITH TAIL
1123							"\xE2\xB1\xA7" => "\xE2\xB1\xA8", # LATIN CAPITAL LETTER H WITH DESCENDER
1124							"\xE2\xB1\xA9" => "\xE2\xB1\xAA", # LATIN CAPITAL LETTER K WITH DESCENDER
1125							"\xE2\xB1\xAB" => "\xE2\xB1\xAC", # LATIN CAPITAL LETTER Z WITH DESCENDER
1126							"\xE2\xB1\xAD" => "\xC9\x91", # LATIN CAPITAL LETTER ALPHA
1127							"\xE2\xB1\xAE" => "\xC9\xB1", # LATIN CAPITAL LETTER M WITH HOOK
1128							"\xE2\xB1\xAF" => "\xC9\x90", # LATIN CAPITAL LETTER TURNED A
1129							"\xE2\xB1\xB0" => "\xC9\x92", # LATIN CAPITAL LETTER TURNED ALPHA
1130							"\xE2\xB1\xB2" => "\xE2\xB1\xB3", # LATIN CAPITAL LETTER W WITH HOOK
1131							"\xE2\xB1\xB5" => "\xE2\xB1\xB6", # LATIN CAPITAL LETTER HALF H
1132							"\xE2\xB1\xBE" => "\xC8\xBF", # LATIN CAPITAL LETTER S WITH SWASH TAIL
1133							"\xE2\xB1\xBF" => "\xC9\x80", # LATIN CAPITAL LETTER Z WITH SWASH TAIL
1134							"\xE2\xB2\x80" => "\xE2\xB2\x81", # COPTIC CAPITAL LETTER ALFA
1135							"\xE2\xB2\x82" => "\xE2\xB2\x83", # COPTIC CAPITAL LETTER VIDA
1136							"\xE2\xB2\x84" => "\xE2\xB2\x85", # COPTIC CAPITAL LETTER GAMMA
1137							"\xE2\xB2\x86" => "\xE2\xB2\x87", # COPTIC CAPITAL LETTER DALDA
1138							"\xE2\xB2\x88" => "\xE2\xB2\x89", # COPTIC CAPITAL LETTER EIE
1139							"\xE2\xB2\x8A" => "\xE2\xB2\x8B", # COPTIC CAPITAL LETTER SOU
1140							"\xE2\xB2\x8C" => "\xE2\xB2\x8D", # COPTIC CAPITAL LETTER ZATA
1141							"\xE2\xB2\x8E" => "\xE2\xB2\x8F", # COPTIC CAPITAL LETTER HATE
1142							"\xE2\xB2\x90" => "\xE2\xB2\x91", # COPTIC CAPITAL LETTER THETHE
1143							"\xE2\xB2\x92" => "\xE2\xB2\x93", # COPTIC CAPITAL LETTER IAUDA
1144							"\xE2\xB2\x94" => "\xE2\xB2\x95", # COPTIC CAPITAL LETTER KAPA
1145							"\xE2\xB2\x96" => "\xE2\xB2\x97", # COPTIC CAPITAL LETTER LAULA
1146							"\xE2\xB2\x98" => "\xE2\xB2\x99", # COPTIC CAPITAL LETTER MI
1147							"\xE2\xB2\x9A" => "\xE2\xB2\x9B", # COPTIC CAPITAL LETTER NI
1148							"\xE2\xB2\x9C" => "\xE2\xB2\x9D", # COPTIC CAPITAL LETTER KSI
1149							"\xE2\xB2\x9E" => "\xE2\xB2\x9F", # COPTIC CAPITAL LETTER O
1150							"\xE2\xB2\xA0" => "\xE2\xB2\xA1", # COPTIC CAPITAL LETTER PI
1151							"\xE2\xB2\xA2" => "\xE2\xB2\xA3", # COPTIC CAPITAL LETTER RO
1152							"\xE2\xB2\xA4" => "\xE2\xB2\xA5", # COPTIC CAPITAL LETTER SIMA
1153							"\xE2\xB2\xA6" => "\xE2\xB2\xA7", # COPTIC CAPITAL LETTER TAU
1154							"\xE2\xB2\xA8" => "\xE2\xB2\xA9", # COPTIC CAPITAL LETTER UA
1155							"\xE2\xB2\xAA" => "\xE2\xB2\xAB", # COPTIC CAPITAL LETTER FI
1156							"\xE2\xB2\xAC" => "\xE2\xB2\xAD", # COPTIC CAPITAL LETTER KHI
1157							"\xE2\xB2\xAE" => "\xE2\xB2\xAF", # COPTIC CAPITAL LETTER PSI
1158							"\xE2\xB2\xB0" => "\xE2\xB2\xB1", # COPTIC CAPITAL LETTER OOU
1159							"\xE2\xB2\xB2" => "\xE2\xB2\xB3", # COPTIC CAPITAL LETTER DIALECT-P ALEF
1160							"\xE2\xB2\xB4" => "\xE2\xB2\xB5", # COPTIC CAPITAL LETTER OLD COPTIC AIN
1161							"\xE2\xB2\xB6" => "\xE2\xB2\xB7", # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
1162							"\xE2\xB2\xB8" => "\xE2\xB2\xB9", # COPTIC CAPITAL LETTER DIALECT-P KAPA
1163							"\xE2\xB2\xBA" => "\xE2\xB2\xBB", # COPTIC CAPITAL LETTER DIALECT-P NI
1164							"\xE2\xB2\xBC" => "\xE2\xB2\xBD", # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
1165							"\xE2\xB2\xBE" => "\xE2\xB2\xBF", # COPTIC CAPITAL LETTER OLD COPTIC OOU
1166							"\xE2\xB3\x80" => "\xE2\xB3\x81", # COPTIC CAPITAL LETTER SAMPI
1167							"\xE2\xB3\x82" => "\xE2\xB3\x83", # COPTIC CAPITAL LETTER CROSSED SHEI
1168							"\xE2\xB3\x84" => "\xE2\xB3\x85", # COPTIC CAPITAL LETTER OLD COPTIC SHEI
1169							"\xE2\xB3\x86" => "\xE2\xB3\x87", # COPTIC CAPITAL LETTER OLD COPTIC ESH
1170							"\xE2\xB3\x88" => "\xE2\xB3\x89", # COPTIC CAPITAL LETTER AKHMIMIC KHEI
1171							"\xE2\xB3\x8A" => "\xE2\xB3\x8B", # COPTIC CAPITAL LETTER DIALECT-P HORI
1172							"\xE2\xB3\x8C" => "\xE2\xB3\x8D", # COPTIC CAPITAL LETTER OLD COPTIC HORI
1173							"\xE2\xB3\x8E" => "\xE2\xB3\x8F", # COPTIC CAPITAL LETTER OLD COPTIC HA
1174							"\xE2\xB3\x90" => "\xE2\xB3\x91", # COPTIC CAPITAL LETTER L-SHAPED HA
1175							"\xE2\xB3\x92" => "\xE2\xB3\x93", # COPTIC CAPITAL LETTER OLD COPTIC HEI
1176							"\xE2\xB3\x94" => "\xE2\xB3\x95", # COPTIC CAPITAL LETTER OLD COPTIC HAT
1177							"\xE2\xB3\x96" => "\xE2\xB3\x97", # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
1178							"\xE2\xB3\x98" => "\xE2\xB3\x99", # COPTIC CAPITAL LETTER OLD COPTIC DJA
1179							"\xE2\xB3\x9A" => "\xE2\xB3\x9B", # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
1180							"\xE2\xB3\x9C" => "\xE2\xB3\x9D", # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
1181							"\xE2\xB3\x9E" => "\xE2\xB3\x9F", # COPTIC CAPITAL LETTER OLD NUBIAN NGI
1182							"\xE2\xB3\xA0" => "\xE2\xB3\xA1", # COPTIC CAPITAL LETTER OLD NUBIAN NYI
1183							"\xE2\xB3\xA2" => "\xE2\xB3\xA3", # COPTIC CAPITAL LETTER OLD NUBIAN WAU
1184							"\xE2\xB3\xAB" => "\xE2\xB3\xAC", # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
1185							"\xE2\xB3\xAD" => "\xE2\xB3\xAE", # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
1186							"\xE2\xB3\xB2" => "\xE2\xB3\xB3", # COPTIC CAPITAL LETTER BOHAIRIC KHEI
1187							"\xEA\x99\x80" => "\xEA\x99\x81", # CYRILLIC CAPITAL LETTER ZEMLYA
1188							"\xEA\x99\x82" => "\xEA\x99\x83", # CYRILLIC CAPITAL LETTER DZELO
1189							"\xEA\x99\x84" => "\xEA\x99\x85", # CYRILLIC CAPITAL LETTER REVERSED DZE
1190							"\xEA\x99\x86" => "\xEA\x99\x87", # CYRILLIC CAPITAL LETTER IOTA
1191							"\xEA\x99\x88" => "\xEA\x99\x89", # CYRILLIC CAPITAL LETTER DJERV
1192							"\xEA\x99\x8A" => "\xEA\x99\x8B", # CYRILLIC CAPITAL LETTER MONOGRAPH UK
1193							"\xEA\x99\x8C" => "\xEA\x99\x8D", # CYRILLIC CAPITAL LETTER BROAD OMEGA
1194							"\xEA\x99\x8E" => "\xEA\x99\x8F", # CYRILLIC CAPITAL LETTER NEUTRAL YER
1195							"\xEA\x99\x90" => "\xEA\x99\x91", # CYRILLIC CAPITAL LETTER YERU WITH BACK YER
1196							"\xEA\x99\x92" => "\xEA\x99\x93", # CYRILLIC CAPITAL LETTER IOTIFIED YAT
1197							"\xEA\x99\x94" => "\xEA\x99\x95", # CYRILLIC CAPITAL LETTER REVERSED YU
1198							"\xEA\x99\x96" => "\xEA\x99\x97", # CYRILLIC CAPITAL LETTER IOTIFIED A
1199							"\xEA\x99\x98" => "\xEA\x99\x99", # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
1200							"\xEA\x99\x9A" => "\xEA\x99\x9B", # CYRILLIC CAPITAL LETTER BLENDED YUS
1201							"\xEA\x99\x9C" => "\xEA\x99\x9D", # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
1202							"\xEA\x99\x9E" => "\xEA\x99\x9F", # CYRILLIC CAPITAL LETTER YN
1203							"\xEA\x99\xA0" => "\xEA\x99\xA1", # CYRILLIC CAPITAL LETTER REVERSED TSE
1204							"\xEA\x99\xA2" => "\xEA\x99\xA3", # CYRILLIC CAPITAL LETTER SOFT DE
1205							"\xEA\x99\xA4" => "\xEA\x99\xA5", # CYRILLIC CAPITAL LETTER SOFT EL
1206							"\xEA\x99\xA6" => "\xEA\x99\xA7", # CYRILLIC CAPITAL LETTER SOFT EM
1207							"\xEA\x99\xA8" => "\xEA\x99\xA9", # CYRILLIC CAPITAL LETTER MONOCULAR O
1208							"\xEA\x99\xAA" => "\xEA\x99\xAB", # CYRILLIC CAPITAL LETTER BINOCULAR O
1209							"\xEA\x99\xAC" => "\xEA\x99\xAD", # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
1210							"\xEA\x9A\x80" => "\xEA\x9A\x81", # CYRILLIC CAPITAL LETTER DWE
1211							"\xEA\x9A\x82" => "\xEA\x9A\x83", # CYRILLIC CAPITAL LETTER DZWE
1212							"\xEA\x9A\x84" => "\xEA\x9A\x85", # CYRILLIC CAPITAL LETTER ZHWE
1213							"\xEA\x9A\x86" => "\xEA\x9A\x87", # CYRILLIC CAPITAL LETTER CCHE
1214							"\xEA\x9A\x88" => "\xEA\x9A\x89", # CYRILLIC CAPITAL LETTER DZZE
1215							"\xEA\x9A\x8A" => "\xEA\x9A\x8B", # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
1216							"\xEA\x9A\x8C" => "\xEA\x9A\x8D", # CYRILLIC CAPITAL LETTER TWE
1217							"\xEA\x9A\x8E" => "\xEA\x9A\x8F", # CYRILLIC CAPITAL LETTER TSWE
1218							"\xEA\x9A\x90" => "\xEA\x9A\x91", # CYRILLIC CAPITAL LETTER TSSE
1219							"\xEA\x9A\x92" => "\xEA\x9A\x93", # CYRILLIC CAPITAL LETTER TCHE
1220							"\xEA\x9A\x94" => "\xEA\x9A\x95", # CYRILLIC CAPITAL LETTER HWE
1221							"\xEA\x9A\x96" => "\xEA\x9A\x97", # CYRILLIC CAPITAL LETTER SHWE
1222							"\xEA\x9A\x98" => "\xEA\x9A\x99", # CYRILLIC CAPITAL LETTER DOUBLE O
1223							"\xEA\x9A\x9A" => "\xEA\x9A\x9B", # CYRILLIC CAPITAL LETTER CROSSED O
1224							"\xEA\x9C\xA2" => "\xEA\x9C\xA3", # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
1225							"\xEA\x9C\xA4" => "\xEA\x9C\xA5", # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
1226							"\xEA\x9C\xA6" => "\xEA\x9C\xA7", # LATIN CAPITAL LETTER HENG
1227							"\xEA\x9C\xA8" => "\xEA\x9C\xA9", # LATIN CAPITAL LETTER TZ
1228							"\xEA\x9C\xAA" => "\xEA\x9C\xAB", # LATIN CAPITAL LETTER TRESILLO
1229							"\xEA\x9C\xAC" => "\xEA\x9C\xAD", # LATIN CAPITAL LETTER CUATRILLO
1230							"\xEA\x9C\xAE" => "\xEA\x9C\xAF", # LATIN CAPITAL LETTER CUATRILLO WITH COMMA
1231							"\xEA\x9C\xB2" => "\xEA\x9C\xB3", # LATIN CAPITAL LETTER AA
1232							"\xEA\x9C\xB4" => "\xEA\x9C\xB5", # LATIN CAPITAL LETTER AO
1233							"\xEA\x9C\xB6" => "\xEA\x9C\xB7", # LATIN CAPITAL LETTER AU
1234							"\xEA\x9C\xB8" => "\xEA\x9C\xB9", # LATIN CAPITAL LETTER AV
1235							"\xEA\x9C\xBA" => "\xEA\x9C\xBB", # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
1236							"\xEA\x9C\xBC" => "\xEA\x9C\xBD", # LATIN CAPITAL LETTER AY
1237							"\xEA\x9C\xBE" => "\xEA\x9C\xBF", # LATIN CAPITAL LETTER REVERSED C WITH DOT
1238							"\xEA\x9D\x80" => "\xEA\x9D\x81", # LATIN CAPITAL LETTER K WITH STROKE
1239							"\xEA\x9D\x82" => "\xEA\x9D\x83", # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
1240							"\xEA\x9D\x84" => "\xEA\x9D\x85", # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
1241							"\xEA\x9D\x86" => "\xEA\x9D\x87", # LATIN CAPITAL LETTER BROKEN L
1242							"\xEA\x9D\x88" => "\xEA\x9D\x89", # LATIN CAPITAL LETTER L WITH HIGH STROKE
1243							"\xEA\x9D\x8A" => "\xEA\x9D\x8B", # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
1244							"\xEA\x9D\x8C" => "\xEA\x9D\x8D", # LATIN CAPITAL LETTER O WITH LOOP
1245							"\xEA\x9D\x8E" => "\xEA\x9D\x8F", # LATIN CAPITAL LETTER OO
1246							"\xEA\x9D\x90" => "\xEA\x9D\x91", # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
1247							"\xEA\x9D\x92" => "\xEA\x9D\x93", # LATIN CAPITAL LETTER P WITH FLOURISH
1248							"\xEA\x9D\x94" => "\xEA\x9D\x95", # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
1249							"\xEA\x9D\x96" => "\xEA\x9D\x97", # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
1250							"\xEA\x9D\x98" => "\xEA\x9D\x99", # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
1251							"\xEA\x9D\x9A" => "\xEA\x9D\x9B", # LATIN CAPITAL LETTER R ROTUNDA
1252							"\xEA\x9D\x9C" => "\xEA\x9D\x9D", # LATIN CAPITAL LETTER RUM ROTUNDA
1253							"\xEA\x9D\x9E" => "\xEA\x9D\x9F", # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
1254							"\xEA\x9D\xA0" => "\xEA\x9D\xA1", # LATIN CAPITAL LETTER VY
1255							"\xEA\x9D\xA2" => "\xEA\x9D\xA3", # LATIN CAPITAL LETTER VISIGOTHIC Z
1256							"\xEA\x9D\xA4" => "\xEA\x9D\xA5", # LATIN CAPITAL LETTER THORN WITH STROKE
1257							"\xEA\x9D\xA6" => "\xEA\x9D\xA7", # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
1258							"\xEA\x9D\xA8" => "\xEA\x9D\xA9", # LATIN CAPITAL LETTER VEND
1259							"\xEA\x9D\xAA" => "\xEA\x9D\xAB", # LATIN CAPITAL LETTER ET
1260							"\xEA\x9D\xAC" => "\xEA\x9D\xAD", # LATIN CAPITAL LETTER IS
1261							"\xEA\x9D\xAE" => "\xEA\x9D\xAF", # LATIN CAPITAL LETTER CON
1262							"\xEA\x9D\xB9" => "\xEA\x9D\xBA", # LATIN CAPITAL LETTER INSULAR D
1263							"\xEA\x9D\xBB" => "\xEA\x9D\xBC", # LATIN CAPITAL LETTER INSULAR F
1264							"\xEA\x9D\xBD" => "\xE1\xB5\xB9", # LATIN CAPITAL LETTER INSULAR G
1265							"\xEA\x9D\xBE" => "\xEA\x9D\xBF", # LATIN CAPITAL LETTER TURNED INSULAR G
1266							"\xEA\x9E\x80" => "\xEA\x9E\x81", # LATIN CAPITAL LETTER TURNED L
1267							"\xEA\x9E\x82" => "\xEA\x9E\x83", # LATIN CAPITAL LETTER INSULAR R
1268							"\xEA\x9E\x84" => "\xEA\x9E\x85", # LATIN CAPITAL LETTER INSULAR S
1269							"\xEA\x9E\x86" => "\xEA\x9E\x87", # LATIN CAPITAL LETTER INSULAR T
1270							"\xEA\x9E\x8B" => "\xEA\x9E\x8C", # LATIN CAPITAL LETTER SALTILLO
1271							"\xEA\x9E\x8D" => "\xC9\xA5", # LATIN CAPITAL LETTER TURNED H
1272							"\xEA\x9E\x90" => "\xEA\x9E\x91", # LATIN CAPITAL LETTER N WITH DESCENDER
1273							"\xEA\x9E\x92" => "\xEA\x9E\x93", # LATIN CAPITAL LETTER C WITH BAR
1274							"\xEA\x9E\x96" => "\xEA\x9E\x97", # LATIN CAPITAL LETTER B WITH FLOURISH
1275							"\xEA\x9E\x98" => "\xEA\x9E\x99", # LATIN CAPITAL LETTER F WITH STROKE
1276							"\xEA\x9E\x9A" => "\xEA\x9E\x9B", # LATIN CAPITAL LETTER VOLAPUK AE
1277							"\xEA\x9E\x9C" => "\xEA\x9E\x9D", # LATIN CAPITAL LETTER VOLAPUK OE
1278							"\xEA\x9E\x9E" => "\xEA\x9E\x9F", # LATIN CAPITAL LETTER VOLAPUK UE
1279							"\xEA\x9E\xA0" => "\xEA\x9E\xA1", # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
1280							"\xEA\x9E\xA2" => "\xEA\x9E\xA3", # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
1281							"\xEA\x9E\xA4" => "\xEA\x9E\xA5", # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
1282							"\xEA\x9E\xA6" => "\xEA\x9E\xA7", # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
1283							"\xEA\x9E\xA8" => "\xEA\x9E\xA9", # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
1284							"\xEA\x9E\xAA" => "\xC9\xA6", # LATIN CAPITAL LETTER H WITH HOOK
1285							"\xEA\x9E\xAB" => "\xC9\x9C", # LATIN CAPITAL LETTER REVERSED OPEN E
1286							"\xEA\x9E\xAC" => "\xC9\xA1", # LATIN CAPITAL LETTER SCRIPT G
1287							"\xEA\x9E\xAD" => "\xC9\xAC", # LATIN CAPITAL LETTER L WITH BELT
1288							"\xEA\x9E\xAE" => "\xC9\xAA", # LATIN CAPITAL LETTER SMALL CAPITAL I
1289							"\xEA\x9E\xB0" => "\xCA\x9E", # LATIN CAPITAL LETTER TURNED K
1290							"\xEA\x9E\xB1" => "\xCA\x87", # LATIN CAPITAL LETTER TURNED T
1291							"\xEA\x9E\xB2" => "\xCA\x9D", # LATIN CAPITAL LETTER J WITH CROSSED-TAIL
1292							"\xEA\x9E\xB3" => "\xEA\xAD\x93", # LATIN CAPITAL LETTER CHI
1293							"\xEA\x9E\xB4" => "\xEA\x9E\xB5", # LATIN CAPITAL LETTER BETA
1294							"\xEA\x9E\xB6" => "\xEA\x9E\xB7", # LATIN CAPITAL LETTER OMEGA
1295							"\xEA\xAD\xB0" => "\xE1\x8E\xA0", # CHEROKEE SMALL LETTER A
1296							"\xEA\xAD\xB1" => "\xE1\x8E\xA1", # CHEROKEE SMALL LETTER E
1297							"\xEA\xAD\xB2" => "\xE1\x8E\xA2", # CHEROKEE SMALL LETTER I
1298							"\xEA\xAD\xB3" => "\xE1\x8E\xA3", # CHEROKEE SMALL LETTER O
1299							"\xEA\xAD\xB4" => "\xE1\x8E\xA4", # CHEROKEE SMALL LETTER U
1300							"\xEA\xAD\xB5" => "\xE1\x8E\xA5", # CHEROKEE SMALL LETTER V
1301							"\xEA\xAD\xB6" => "\xE1\x8E\xA6", # CHEROKEE SMALL LETTER GA
1302							"\xEA\xAD\xB7" => "\xE1\x8E\xA7", # CHEROKEE SMALL LETTER KA
1303							"\xEA\xAD\xB8" => "\xE1\x8E\xA8", # CHEROKEE SMALL LETTER GE
1304							"\xEA\xAD\xB9" => "\xE1\x8E\xA9", # CHEROKEE SMALL LETTER GI
1305							"\xEA\xAD\xBA" => "\xE1\x8E\xAA", # CHEROKEE SMALL LETTER GO
1306							"\xEA\xAD\xBB" => "\xE1\x8E\xAB", # CHEROKEE SMALL LETTER GU
1307							"\xEA\xAD\xBC" => "\xE1\x8E\xAC", # CHEROKEE SMALL LETTER GV
1308							"\xEA\xAD\xBD" => "\xE1\x8E\xAD", # CHEROKEE SMALL LETTER HA
1309							"\xEA\xAD\xBE" => "\xE1\x8E\xAE", # CHEROKEE SMALL LETTER HE
1310							"\xEA\xAD\xBF" => "\xE1\x8E\xAF", # CHEROKEE SMALL LETTER HI
1311							"\xEA\xAE\x80" => "\xE1\x8E\xB0", # CHEROKEE SMALL LETTER HO
1312							"\xEA\xAE\x81" => "\xE1\x8E\xB1", # CHEROKEE SMALL LETTER HU
1313							"\xEA\xAE\x82" => "\xE1\x8E\xB2", # CHEROKEE SMALL LETTER HV
1314							"\xEA\xAE\x83" => "\xE1\x8E\xB3", # CHEROKEE SMALL LETTER LA
1315							"\xEA\xAE\x84" => "\xE1\x8E\xB4", # CHEROKEE SMALL LETTER LE
1316							"\xEA\xAE\x85" => "\xE1\x8E\xB5", # CHEROKEE SMALL LETTER LI
1317							"\xEA\xAE\x86" => "\xE1\x8E\xB6", # CHEROKEE SMALL LETTER LO
1318							"\xEA\xAE\x87" => "\xE1\x8E\xB7", # CHEROKEE SMALL LETTER LU
1319							"\xEA\xAE\x88" => "\xE1\x8E\xB8", # CHEROKEE SMALL LETTER LV
1320							"\xEA\xAE\x89" => "\xE1\x8E\xB9", # CHEROKEE SMALL LETTER MA
1321							"\xEA\xAE\x8A" => "\xE1\x8E\xBA", # CHEROKEE SMALL LETTER ME
1322							"\xEA\xAE\x8B" => "\xE1\x8E\xBB", # CHEROKEE SMALL LETTER MI
1323							"\xEA\xAE\x8C" => "\xE1\x8E\xBC", # CHEROKEE SMALL LETTER MO
1324							"\xEA\xAE\x8D" => "\xE1\x8E\xBD", # CHEROKEE SMALL LETTER MU
1325							"\xEA\xAE\x8E" => "\xE1\x8E\xBE", # CHEROKEE SMALL LETTER NA
1326							"\xEA\xAE\x8F" => "\xE1\x8E\xBF", # CHEROKEE SMALL LETTER HNA
1327							"\xEA\xAE\x90" => "\xE1\x8F\x80", # CHEROKEE SMALL LETTER NAH
1328							"\xEA\xAE\x91" => "\xE1\x8F\x81", # CHEROKEE SMALL LETTER NE
1329							"\xEA\xAE\x92" => "\xE1\x8F\x82", # CHEROKEE SMALL LETTER NI
1330							"\xEA\xAE\x93" => "\xE1\x8F\x83", # CHEROKEE SMALL LETTER NO
1331							"\xEA\xAE\x94" => "\xE1\x8F\x84", # CHEROKEE SMALL LETTER NU
1332							"\xEA\xAE\x95" => "\xE1\x8F\x85", # CHEROKEE SMALL LETTER NV
1333							"\xEA\xAE\x96" => "\xE1\x8F\x86", # CHEROKEE SMALL LETTER QUA
1334							"\xEA\xAE\x97" => "\xE1\x8F\x87", # CHEROKEE SMALL LETTER QUE
1335							"\xEA\xAE\x98" => "\xE1\x8F\x88", # CHEROKEE SMALL LETTER QUI
1336							"\xEA\xAE\x99" => "\xE1\x8F\x89", # CHEROKEE SMALL LETTER QUO
1337							"\xEA\xAE\x9A" => "\xE1\x8F\x8A", # CHEROKEE SMALL LETTER QUU
1338							"\xEA\xAE\x9B" => "\xE1\x8F\x8B", # CHEROKEE SMALL LETTER QUV
1339							"\xEA\xAE\x9C" => "\xE1\x8F\x8C", # CHEROKEE SMALL LETTER SA
1340							"\xEA\xAE\x9D" => "\xE1\x8F\x8D", # CHEROKEE SMALL LETTER S
1341							"\xEA\xAE\x9E" => "\xE1\x8F\x8E", # CHEROKEE SMALL LETTER SE
1342							"\xEA\xAE\x9F" => "\xE1\x8F\x8F", # CHEROKEE SMALL LETTER SI
1343							"\xEA\xAE\xA0" => "\xE1\x8F\x90", # CHEROKEE SMALL LETTER SO
1344							"\xEA\xAE\xA1" => "\xE1\x8F\x91", # CHEROKEE SMALL LETTER SU
1345							"\xEA\xAE\xA2" => "\xE1\x8F\x92", # CHEROKEE SMALL LETTER SV
1346							"\xEA\xAE\xA3" => "\xE1\x8F\x93", # CHEROKEE SMALL LETTER DA
1347							"\xEA\xAE\xA4" => "\xE1\x8F\x94", # CHEROKEE SMALL LETTER TA
1348							"\xEA\xAE\xA5" => "\xE1\x8F\x95", # CHEROKEE SMALL LETTER DE
1349							"\xEA\xAE\xA6" => "\xE1\x8F\x96", # CHEROKEE SMALL LETTER TE
1350							"\xEA\xAE\xA7" => "\xE1\x8F\x97", # CHEROKEE SMALL LETTER DI
1351							"\xEA\xAE\xA8" => "\xE1\x8F\x98", # CHEROKEE SMALL LETTER TI
1352							"\xEA\xAE\xA9" => "\xE1\x8F\x99", # CHEROKEE SMALL LETTER DO
1353							"\xEA\xAE\xAA" => "\xE1\x8F\x9A", # CHEROKEE SMALL LETTER DU
1354							"\xEA\xAE\xAB" => "\xE1\x8F\x9B", # CHEROKEE SMALL LETTER DV
1355							"\xEA\xAE\xAC" => "\xE1\x8F\x9C", # CHEROKEE SMALL LETTER DLA
1356							"\xEA\xAE\xAD" => "\xE1\x8F\x9D", # CHEROKEE SMALL LETTER TLA
1357							"\xEA\xAE\xAE" => "\xE1\x8F\x9E", # CHEROKEE SMALL LETTER TLE
1358							"\xEA\xAE\xAF" => "\xE1\x8F\x9F", # CHEROKEE SMALL LETTER TLI
1359							"\xEA\xAE\xB0" => "\xE1\x8F\xA0", # CHEROKEE SMALL LETTER TLO
1360							"\xEA\xAE\xB1" => "\xE1\x8F\xA1", # CHEROKEE SMALL LETTER TLU
1361							"\xEA\xAE\xB2" => "\xE1\x8F\xA2", # CHEROKEE SMALL LETTER TLV
1362							"\xEA\xAE\xB3" => "\xE1\x8F\xA3", # CHEROKEE SMALL LETTER TSA
1363							"\xEA\xAE\xB4" => "\xE1\x8F\xA4", # CHEROKEE SMALL LETTER TSE
1364							"\xEA\xAE\xB5" => "\xE1\x8F\xA5", # CHEROKEE SMALL LETTER TSI
1365							"\xEA\xAE\xB6" => "\xE1\x8F\xA6", # CHEROKEE SMALL LETTER TSO
1366							"\xEA\xAE\xB7" => "\xE1\x8F\xA7", # CHEROKEE SMALL LETTER TSU
1367							"\xEA\xAE\xB8" => "\xE1\x8F\xA8", # CHEROKEE SMALL LETTER TSV
1368							"\xEA\xAE\xB9" => "\xE1\x8F\xA9", # CHEROKEE SMALL LETTER WA
1369							"\xEA\xAE\xBA" => "\xE1\x8F\xAA", # CHEROKEE SMALL LETTER WE
1370							"\xEA\xAE\xBB" => "\xE1\x8F\xAB", # CHEROKEE SMALL LETTER WI
1371							"\xEA\xAE\xBC" => "\xE1\x8F\xAC", # CHEROKEE SMALL LETTER WO
1372							"\xEA\xAE\xBD" => "\xE1\x8F\xAD", # CHEROKEE SMALL LETTER WU
1373							"\xEA\xAE\xBE" => "\xE1\x8F\xAE", # CHEROKEE SMALL LETTER WV
1374							"\xEA\xAE\xBF" => "\xE1\x8F\xAF", # CHEROKEE SMALL LETTER YA
1375							"\xEF\xAC\x80" => "\x66\x66", # LATIN SMALL LIGATURE FF
1376							"\xEF\xAC\x81" => "\x66\x69", # LATIN SMALL LIGATURE FI
1377							"\xEF\xAC\x82" => "\x66\x6C", # LATIN SMALL LIGATURE FL
1378							"\xEF\xAC\x83" => "\x66\x66\x69", # LATIN SMALL LIGATURE FFI
1379							"\xEF\xAC\x84" => "\x66\x66\x6C", # LATIN SMALL LIGATURE FFL
1380							"\xEF\xAC\x85" => "\x73\x74", # LATIN SMALL LIGATURE LONG S T
1381							"\xEF\xAC\x86" => "\x73\x74", # LATIN SMALL LIGATURE ST
1382							"\xEF\xAC\x93" => "\xD5\xB4\xD5\xB6", # ARMENIAN SMALL LIGATURE MEN NOW
1383							"\xEF\xAC\x94" => "\xD5\xB4\xD5\xA5", # ARMENIAN SMALL LIGATURE MEN ECH
1384							"\xEF\xAC\x95" => "\xD5\xB4\xD5\xAB", # ARMENIAN SMALL LIGATURE MEN INI
1385							"\xEF\xAC\x96" => "\xD5\xBE\xD5\xB6", # ARMENIAN SMALL LIGATURE VEW NOW
1386							"\xEF\xAC\x97" => "\xD5\xB4\xD5\xAD", # ARMENIAN SMALL LIGATURE MEN XEH
1387							"\xEF\xBC\xA1" => "\xEF\xBD\x81", # FULLWIDTH LATIN CAPITAL LETTER A
1388							"\xEF\xBC\xA2" => "\xEF\xBD\x82", # FULLWIDTH LATIN CAPITAL LETTER B
1389							"\xEF\xBC\xA3" => "\xEF\xBD\x83", # FULLWIDTH LATIN CAPITAL LETTER C
1390							"\xEF\xBC\xA4" => "\xEF\xBD\x84", # FULLWIDTH LATIN CAPITAL LETTER D
1391							"\xEF\xBC\xA5" => "\xEF\xBD\x85", # FULLWIDTH LATIN CAPITAL LETTER E
1392							"\xEF\xBC\xA6" => "\xEF\xBD\x86", # FULLWIDTH LATIN CAPITAL LETTER F
1393							"\xEF\xBC\xA7" => "\xEF\xBD\x87", # FULLWIDTH LATIN CAPITAL LETTER G
1394							"\xEF\xBC\xA8" => "\xEF\xBD\x88", # FULLWIDTH LATIN CAPITAL LETTER H
1395							"\xEF\xBC\xA9" => "\xEF\xBD\x89", # FULLWIDTH LATIN CAPITAL LETTER I
1396							"\xEF\xBC\xAA" => "\xEF\xBD\x8A", # FULLWIDTH LATIN CAPITAL LETTER J
1397							"\xEF\xBC\xAB" => "\xEF\xBD\x8B", # FULLWIDTH LATIN CAPITAL LETTER K
1398							"\xEF\xBC\xAC" => "\xEF\xBD\x8C", # FULLWIDTH LATIN CAPITAL LETTER L
1399							"\xEF\xBC\xAD" => "\xEF\xBD\x8D", # FULLWIDTH LATIN CAPITAL LETTER M
1400							"\xEF\xBC\xAE" => "\xEF\xBD\x8E", # FULLWIDTH LATIN CAPITAL LETTER N
1401							"\xEF\xBC\xAF" => "\xEF\xBD\x8F", # FULLWIDTH LATIN CAPITAL LETTER O
1402							"\xEF\xBC\xB0" => "\xEF\xBD\x90", # FULLWIDTH LATIN CAPITAL LETTER P
1403							"\xEF\xBC\xB1" => "\xEF\xBD\x91", # FULLWIDTH LATIN CAPITAL LETTER Q
1404							"\xEF\xBC\xB2" => "\xEF\xBD\x92", # FULLWIDTH LATIN CAPITAL LETTER R
1405							"\xEF\xBC\xB3" => "\xEF\xBD\x93", # FULLWIDTH LATIN CAPITAL LETTER S
1406							"\xEF\xBC\xB4" => "\xEF\xBD\x94", # FULLWIDTH LATIN CAPITAL LETTER T
1407							"\xEF\xBC\xB5" => "\xEF\xBD\x95", # FULLWIDTH LATIN CAPITAL LETTER U
1408							"\xEF\xBC\xB6" => "\xEF\xBD\x96", # FULLWIDTH LATIN CAPITAL LETTER V
1409							"\xEF\xBC\xB7" => "\xEF\xBD\x97", # FULLWIDTH LATIN CAPITAL LETTER W
1410							"\xEF\xBC\xB8" => "\xEF\xBD\x98", # FULLWIDTH LATIN CAPITAL LETTER X
1411							"\xEF\xBC\xB9" => "\xEF\xBD\x99", # FULLWIDTH LATIN CAPITAL LETTER Y
1412							"\xEF\xBC\xBA" => "\xEF\xBD\x9A", # FULLWIDTH LATIN CAPITAL LETTER Z
1413							"\xF0\x90\x90\x80" => "\xF0\x90\x90\xA8", # DESERET CAPITAL LETTER LONG I
1414							"\xF0\x90\x90\x81" => "\xF0\x90\x90\xA9", # DESERET CAPITAL LETTER LONG E
1415							"\xF0\x90\x90\x82" => "\xF0\x90\x90\xAA", # DESERET CAPITAL LETTER LONG A
1416							"\xF0\x90\x90\x83" => "\xF0\x90\x90\xAB", # DESERET CAPITAL LETTER LONG AH
1417							"\xF0\x90\x90\x84" => "\xF0\x90\x90\xAC", # DESERET CAPITAL LETTER LONG O
1418							"\xF0\x90\x90\x85" => "\xF0\x90\x90\xAD", # DESERET CAPITAL LETTER LONG OO
1419							"\xF0\x90\x90\x86" => "\xF0\x90\x90\xAE", # DESERET CAPITAL LETTER SHORT I
1420							"\xF0\x90\x90\x87" => "\xF0\x90\x90\xAF", # DESERET CAPITAL LETTER SHORT E
1421							"\xF0\x90\x90\x88" => "\xF0\x90\x90\xB0", # DESERET CAPITAL LETTER SHORT A
1422							"\xF0\x90\x90\x89" => "\xF0\x90\x90\xB1", # DESERET CAPITAL LETTER SHORT AH
1423							"\xF0\x90\x90\x8A" => "\xF0\x90\x90\xB2", # DESERET CAPITAL LETTER SHORT O
1424							"\xF0\x90\x90\x8B" => "\xF0\x90\x90\xB3", # DESERET CAPITAL LETTER SHORT OO
1425							"\xF0\x90\x90\x8C" => "\xF0\x90\x90\xB4", # DESERET CAPITAL LETTER AY
1426							"\xF0\x90\x90\x8D" => "\xF0\x90\x90\xB5", # DESERET CAPITAL LETTER OW
1427							"\xF0\x90\x90\x8E" => "\xF0\x90\x90\xB6", # DESERET CAPITAL LETTER WU
1428							"\xF0\x90\x90\x8F" => "\xF0\x90\x90\xB7", # DESERET CAPITAL LETTER YEE
1429							"\xF0\x90\x90\x90" => "\xF0\x90\x90\xB8", # DESERET CAPITAL LETTER H
1430							"\xF0\x90\x90\x91" => "\xF0\x90\x90\xB9", # DESERET CAPITAL LETTER PEE
1431							"\xF0\x90\x90\x92" => "\xF0\x90\x90\xBA", # DESERET CAPITAL LETTER BEE
1432							"\xF0\x90\x90\x93" => "\xF0\x90\x90\xBB", # DESERET CAPITAL LETTER TEE
1433							"\xF0\x90\x90\x94" => "\xF0\x90\x90\xBC", # DESERET CAPITAL LETTER DEE
1434							"\xF0\x90\x90\x95" => "\xF0\x90\x90\xBD", # DESERET CAPITAL LETTER CHEE
1435							"\xF0\x90\x90\x96" => "\xF0\x90\x90\xBE", # DESERET CAPITAL LETTER JEE
1436							"\xF0\x90\x90\x97" => "\xF0\x90\x90\xBF", # DESERET CAPITAL LETTER KAY
1437							"\xF0\x90\x90\x98" => "\xF0\x90\x91\x80", # DESERET CAPITAL LETTER GAY
1438							"\xF0\x90\x90\x99" => "\xF0\x90\x91\x81", # DESERET CAPITAL LETTER EF
1439							"\xF0\x90\x90\x9A" => "\xF0\x90\x91\x82", # DESERET CAPITAL LETTER VEE
1440							"\xF0\x90\x90\x9B" => "\xF0\x90\x91\x83", # DESERET CAPITAL LETTER ETH
1441							"\xF0\x90\x90\x9C" => "\xF0\x90\x91\x84", # DESERET CAPITAL LETTER THEE
1442							"\xF0\x90\x90\x9D" => "\xF0\x90\x91\x85", # DESERET CAPITAL LETTER ES
1443							"\xF0\x90\x90\x9E" => "\xF0\x90\x91\x86", # DESERET CAPITAL LETTER ZEE
1444							"\xF0\x90\x90\x9F" => "\xF0\x90\x91\x87", # DESERET CAPITAL LETTER ESH
1445							"\xF0\x90\x90\xA0" => "\xF0\x90\x91\x88", # DESERET CAPITAL LETTER ZHEE
1446							"\xF0\x90\x90\xA1" => "\xF0\x90\x91\x89", # DESERET CAPITAL LETTER ER
1447							"\xF0\x90\x90\xA2" => "\xF0\x90\x91\x8A", # DESERET CAPITAL LETTER EL
1448							"\xF0\x90\x90\xA3" => "\xF0\x90\x91\x8B", # DESERET CAPITAL LETTER EM
1449							"\xF0\x90\x90\xA4" => "\xF0\x90\x91\x8C", # DESERET CAPITAL LETTER EN
1450							"\xF0\x90\x90\xA5" => "\xF0\x90\x91\x8D", # DESERET CAPITAL LETTER ENG
1451							"\xF0\x90\x90\xA6" => "\xF0\x90\x91\x8E", # DESERET CAPITAL LETTER OI
1452							"\xF0\x90\x90\xA7" => "\xF0\x90\x91\x8F", # DESERET CAPITAL LETTER EW
1453							"\xF0\x90\x92\xB0" => "\xF0\x90\x93\x98", # OSAGE CAPITAL LETTER A
1454							"\xF0\x90\x92\xB1" => "\xF0\x90\x93\x99", # OSAGE CAPITAL LETTER AI
1455							"\xF0\x90\x92\xB2" => "\xF0\x90\x93\x9A", # OSAGE CAPITAL LETTER AIN
1456							"\xF0\x90\x92\xB3" => "\xF0\x90\x93\x9B", # OSAGE CAPITAL LETTER AH
1457							"\xF0\x90\x92\xB4" => "\xF0\x90\x93\x9C", # OSAGE CAPITAL LETTER BRA
1458							"\xF0\x90\x92\xB5" => "\xF0\x90\x93\x9D", # OSAGE CAPITAL LETTER CHA
1459							"\xF0\x90\x92\xB6" => "\xF0\x90\x93\x9E", # OSAGE CAPITAL LETTER EHCHA
1460							"\xF0\x90\x92\xB7" => "\xF0\x90\x93\x9F", # OSAGE CAPITAL LETTER E
1461							"\xF0\x90\x92\xB8" => "\xF0\x90\x93\xA0", # OSAGE CAPITAL LETTER EIN
1462							"\xF0\x90\x92\xB9" => "\xF0\x90\x93\xA1", # OSAGE CAPITAL LETTER HA
1463							"\xF0\x90\x92\xBA" => "\xF0\x90\x93\xA2", # OSAGE CAPITAL LETTER HYA
1464							"\xF0\x90\x92\xBB" => "\xF0\x90\x93\xA3", # OSAGE CAPITAL LETTER I
1465							"\xF0\x90\x92\xBC" => "\xF0\x90\x93\xA4", # OSAGE CAPITAL LETTER KA
1466							"\xF0\x90\x92\xBD" => "\xF0\x90\x93\xA5", # OSAGE CAPITAL LETTER EHKA
1467							"\xF0\x90\x92\xBE" => "\xF0\x90\x93\xA6", # OSAGE CAPITAL LETTER KYA
1468							"\xF0\x90\x92\xBF" => "\xF0\x90\x93\xA7", # OSAGE CAPITAL LETTER LA
1469							"\xF0\x90\x93\x80" => "\xF0\x90\x93\xA8", # OSAGE CAPITAL LETTER MA
1470							"\xF0\x90\x93\x81" => "\xF0\x90\x93\xA9", # OSAGE CAPITAL LETTER NA
1471							"\xF0\x90\x93\x82" => "\xF0\x90\x93\xAA", # OSAGE CAPITAL LETTER O
1472							"\xF0\x90\x93\x83" => "\xF0\x90\x93\xAB", # OSAGE CAPITAL LETTER OIN
1473							"\xF0\x90\x93\x84" => "\xF0\x90\x93\xAC", # OSAGE CAPITAL LETTER PA
1474							"\xF0\x90\x93\x85" => "\xF0\x90\x93\xAD", # OSAGE CAPITAL LETTER EHPA
1475							"\xF0\x90\x93\x86" => "\xF0\x90\x93\xAE", # OSAGE CAPITAL LETTER SA
1476							"\xF0\x90\x93\x87" => "\xF0\x90\x93\xAF", # OSAGE CAPITAL LETTER SHA
1477							"\xF0\x90\x93\x88" => "\xF0\x90\x93\xB0", # OSAGE CAPITAL LETTER TA
1478							"\xF0\x90\x93\x89" => "\xF0\x90\x93\xB1", # OSAGE CAPITAL LETTER EHTA
1479							"\xF0\x90\x93\x8A" => "\xF0\x90\x93\xB2", # OSAGE CAPITAL LETTER TSA
1480							"\xF0\x90\x93\x8B" => "\xF0\x90\x93\xB3", # OSAGE CAPITAL LETTER EHTSA
1481							"\xF0\x90\x93\x8C" => "\xF0\x90\x93\xB4", # OSAGE CAPITAL LETTER TSHA
1482							"\xF0\x90\x93\x8D" => "\xF0\x90\x93\xB5", # OSAGE CAPITAL LETTER DHA
1483							"\xF0\x90\x93\x8E" => "\xF0\x90\x93\xB6", # OSAGE CAPITAL LETTER U
1484							"\xF0\x90\x93\x8F" => "\xF0\x90\x93\xB7", # OSAGE CAPITAL LETTER WA
1485							"\xF0\x90\x93\x90" => "\xF0\x90\x93\xB8", # OSAGE CAPITAL LETTER KHA
1486							"\xF0\x90\x93\x91" => "\xF0\x90\x93\xB9", # OSAGE CAPITAL LETTER GHA
1487							"\xF0\x90\x93\x92" => "\xF0\x90\x93\xBA", # OSAGE CAPITAL LETTER ZA
1488							"\xF0\x90\x93\x93" => "\xF0\x90\x93\xBB", # OSAGE CAPITAL LETTER ZHA
1489							"\xF0\x90\xB2\x80" => "\xF0\x90\xB3\x80", # OLD HUNGARIAN CAPITAL LETTER A
1490							"\xF0\x90\xB2\x81" => "\xF0\x90\xB3\x81", # OLD HUNGARIAN CAPITAL LETTER AA
1491							"\xF0\x90\xB2\x82" => "\xF0\x90\xB3\x82", # OLD HUNGARIAN CAPITAL LETTER EB
1492							"\xF0\x90\xB2\x83" => "\xF0\x90\xB3\x83", # OLD HUNGARIAN CAPITAL LETTER AMB
1493							"\xF0\x90\xB2\x84" => "\xF0\x90\xB3\x84", # OLD HUNGARIAN CAPITAL LETTER EC
1494							"\xF0\x90\xB2\x85" => "\xF0\x90\xB3\x85", # OLD HUNGARIAN CAPITAL LETTER ENC
1495							"\xF0\x90\xB2\x86" => "\xF0\x90\xB3\x86", # OLD HUNGARIAN CAPITAL LETTER ECS
1496							"\xF0\x90\xB2\x87" => "\xF0\x90\xB3\x87", # OLD HUNGARIAN CAPITAL LETTER ED
1497							"\xF0\x90\xB2\x88" => "\xF0\x90\xB3\x88", # OLD HUNGARIAN CAPITAL LETTER AND
1498							"\xF0\x90\xB2\x89" => "\xF0\x90\xB3\x89", # OLD HUNGARIAN CAPITAL LETTER E
1499							"\xF0\x90\xB2\x8A" => "\xF0\x90\xB3\x8A", # OLD HUNGARIAN CAPITAL LETTER CLOSE E
1500							"\xF0\x90\xB2\x8B" => "\xF0\x90\xB3\x8B", # OLD HUNGARIAN CAPITAL LETTER EE
1501							"\xF0\x90\xB2\x8C" => "\xF0\x90\xB3\x8C", # OLD HUNGARIAN CAPITAL LETTER EF
1502							"\xF0\x90\xB2\x8D" => "\xF0\x90\xB3\x8D", # OLD HUNGARIAN CAPITAL LETTER EG
1503							"\xF0\x90\xB2\x8E" => "\xF0\x90\xB3\x8E", # OLD HUNGARIAN CAPITAL LETTER EGY
1504							"\xF0\x90\xB2\x8F" => "\xF0\x90\xB3\x8F", # OLD HUNGARIAN CAPITAL LETTER EH
1505							"\xF0\x90\xB2\x90" => "\xF0\x90\xB3\x90", # OLD HUNGARIAN CAPITAL LETTER I
1506							"\xF0\x90\xB2\x91" => "\xF0\x90\xB3\x91", # OLD HUNGARIAN CAPITAL LETTER II
1507							"\xF0\x90\xB2\x92" => "\xF0\x90\xB3\x92", # OLD HUNGARIAN CAPITAL LETTER EJ
1508							"\xF0\x90\xB2\x93" => "\xF0\x90\xB3\x93", # OLD HUNGARIAN CAPITAL LETTER EK
1509							"\xF0\x90\xB2\x94" => "\xF0\x90\xB3\x94", # OLD HUNGARIAN CAPITAL LETTER AK
1510							"\xF0\x90\xB2\x95" => "\xF0\x90\xB3\x95", # OLD HUNGARIAN CAPITAL LETTER UNK
1511							"\xF0\x90\xB2\x96" => "\xF0\x90\xB3\x96", # OLD HUNGARIAN CAPITAL LETTER EL
1512							"\xF0\x90\xB2\x97" => "\xF0\x90\xB3\x97", # OLD HUNGARIAN CAPITAL LETTER ELY
1513							"\xF0\x90\xB2\x98" => "\xF0\x90\xB3\x98", # OLD HUNGARIAN CAPITAL LETTER EM
1514							"\xF0\x90\xB2\x99" => "\xF0\x90\xB3\x99", # OLD HUNGARIAN CAPITAL LETTER EN
1515							"\xF0\x90\xB2\x9A" => "\xF0\x90\xB3\x9A", # OLD HUNGARIAN CAPITAL LETTER ENY
1516							"\xF0\x90\xB2\x9B" => "\xF0\x90\xB3\x9B", # OLD HUNGARIAN CAPITAL LETTER O
1517							"\xF0\x90\xB2\x9C" => "\xF0\x90\xB3\x9C", # OLD HUNGARIAN CAPITAL LETTER OO
1518							"\xF0\x90\xB2\x9D" => "\xF0\x90\xB3\x9D", # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE
1519							"\xF0\x90\xB2\x9E" => "\xF0\x90\xB3\x9E", # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE
1520							"\xF0\x90\xB2\x9F" => "\xF0\x90\xB3\x9F", # OLD HUNGARIAN CAPITAL LETTER OEE
1521							"\xF0\x90\xB2\xA0" => "\xF0\x90\xB3\xA0", # OLD HUNGARIAN CAPITAL LETTER EP
1522							"\xF0\x90\xB2\xA1" => "\xF0\x90\xB3\xA1", # OLD HUNGARIAN CAPITAL LETTER EMP
1523							"\xF0\x90\xB2\xA2" => "\xF0\x90\xB3\xA2", # OLD HUNGARIAN CAPITAL LETTER ER
1524							"\xF0\x90\xB2\xA3" => "\xF0\x90\xB3\xA3", # OLD HUNGARIAN CAPITAL LETTER SHORT ER
1525							"\xF0\x90\xB2\xA4" => "\xF0\x90\xB3\xA4", # OLD HUNGARIAN CAPITAL LETTER ES
1526							"\xF0\x90\xB2\xA5" => "\xF0\x90\xB3\xA5", # OLD HUNGARIAN CAPITAL LETTER ESZ
1527							"\xF0\x90\xB2\xA6" => "\xF0\x90\xB3\xA6", # OLD HUNGARIAN CAPITAL LETTER ET
1528							"\xF0\x90\xB2\xA7" => "\xF0\x90\xB3\xA7", # OLD HUNGARIAN CAPITAL LETTER ENT
1529							"\xF0\x90\xB2\xA8" => "\xF0\x90\xB3\xA8", # OLD HUNGARIAN CAPITAL LETTER ETY
1530							"\xF0\x90\xB2\xA9" => "\xF0\x90\xB3\xA9", # OLD HUNGARIAN CAPITAL LETTER ECH
1531							"\xF0\x90\xB2\xAA" => "\xF0\x90\xB3\xAA", # OLD HUNGARIAN CAPITAL LETTER U
1532							"\xF0\x90\xB2\xAB" => "\xF0\x90\xB3\xAB", # OLD HUNGARIAN CAPITAL LETTER UU
1533							"\xF0\x90\xB2\xAC" => "\xF0\x90\xB3\xAC", # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE
1534							"\xF0\x90\xB2\xAD" => "\xF0\x90\xB3\xAD", # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE
1535							"\xF0\x90\xB2\xAE" => "\xF0\x90\xB3\xAE", # OLD HUNGARIAN CAPITAL LETTER EV
1536							"\xF0\x90\xB2\xAF" => "\xF0\x90\xB3\xAF", # OLD HUNGARIAN CAPITAL LETTER EZ
1537							"\xF0\x90\xB2\xB0" => "\xF0\x90\xB3\xB0", # OLD HUNGARIAN CAPITAL LETTER EZS
1538							"\xF0\x90\xB2\xB1" => "\xF0\x90\xB3\xB1", # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN
1539							"\xF0\x90\xB2\xB2" => "\xF0\x90\xB3\xB2", # OLD HUNGARIAN CAPITAL LETTER US
1540							"\xF0\x91\xA2\xA0" => "\xF0\x91\xA3\x80", # WARANG CITI CAPITAL LETTER NGAA
1541							"\xF0\x91\xA2\xA1" => "\xF0\x91\xA3\x81", # WARANG CITI CAPITAL LETTER A
1542							"\xF0\x91\xA2\xA2" => "\xF0\x91\xA3\x82", # WARANG CITI CAPITAL LETTER WI
1543							"\xF0\x91\xA2\xA3" => "\xF0\x91\xA3\x83", # WARANG CITI CAPITAL LETTER YU
1544							"\xF0\x91\xA2\xA4" => "\xF0\x91\xA3\x84", # WARANG CITI CAPITAL LETTER YA
1545							"\xF0\x91\xA2\xA5" => "\xF0\x91\xA3\x85", # WARANG CITI CAPITAL LETTER YO
1546							"\xF0\x91\xA2\xA6" => "\xF0\x91\xA3\x86", # WARANG CITI CAPITAL LETTER II
1547							"\xF0\x91\xA2\xA7" => "\xF0\x91\xA3\x87", # WARANG CITI CAPITAL LETTER UU
1548							"\xF0\x91\xA2\xA8" => "\xF0\x91\xA3\x88", # WARANG CITI CAPITAL LETTER E
1549							"\xF0\x91\xA2\xA9" => "\xF0\x91\xA3\x89", # WARANG CITI CAPITAL LETTER O
1550							"\xF0\x91\xA2\xAA" => "\xF0\x91\xA3\x8A", # WARANG CITI CAPITAL LETTER ANG
1551							"\xF0\x91\xA2\xAB" => "\xF0\x91\xA3\x8B", # WARANG CITI CAPITAL LETTER GA
1552							"\xF0\x91\xA2\xAC" => "\xF0\x91\xA3\x8C", # WARANG CITI CAPITAL LETTER KO
1553							"\xF0\x91\xA2\xAD" => "\xF0\x91\xA3\x8D", # WARANG CITI CAPITAL LETTER ENY
1554							"\xF0\x91\xA2\xAE" => "\xF0\x91\xA3\x8E", # WARANG CITI CAPITAL LETTER YUJ
1555							"\xF0\x91\xA2\xAF" => "\xF0\x91\xA3\x8F", # WARANG CITI CAPITAL LETTER UC
1556							"\xF0\x91\xA2\xB0" => "\xF0\x91\xA3\x90", # WARANG CITI CAPITAL LETTER ENN
1557							"\xF0\x91\xA2\xB1" => "\xF0\x91\xA3\x91", # WARANG CITI CAPITAL LETTER ODD
1558							"\xF0\x91\xA2\xB2" => "\xF0\x91\xA3\x92", # WARANG CITI CAPITAL LETTER TTE
1559							"\xF0\x91\xA2\xB3" => "\xF0\x91\xA3\x93", # WARANG CITI CAPITAL LETTER NUNG
1560							"\xF0\x91\xA2\xB4" => "\xF0\x91\xA3\x94", # WARANG CITI CAPITAL LETTER DA
1561							"\xF0\x91\xA2\xB5" => "\xF0\x91\xA3\x95", # WARANG CITI CAPITAL LETTER AT
1562							"\xF0\x91\xA2\xB6" => "\xF0\x91\xA3\x96", # WARANG CITI CAPITAL LETTER AM
1563							"\xF0\x91\xA2\xB7" => "\xF0\x91\xA3\x97", # WARANG CITI CAPITAL LETTER BU
1564							"\xF0\x91\xA2\xB8" => "\xF0\x91\xA3\x98", # WARANG CITI CAPITAL LETTER PU
1565							"\xF0\x91\xA2\xB9" => "\xF0\x91\xA3\x99", # WARANG CITI CAPITAL LETTER HIYO
1566							"\xF0\x91\xA2\xBA" => "\xF0\x91\xA3\x9A", # WARANG CITI CAPITAL LETTER HOLO
1567							"\xF0\x91\xA2\xBB" => "\xF0\x91\xA3\x9B", # WARANG CITI CAPITAL LETTER HORR
1568							"\xF0\x91\xA2\xBC" => "\xF0\x91\xA3\x9C", # WARANG CITI CAPITAL LETTER HAR
1569							"\xF0\x91\xA2\xBD" => "\xF0\x91\xA3\x9D", # WARANG CITI CAPITAL LETTER SSUU
1570							"\xF0\x91\xA2\xBE" => "\xF0\x91\xA3\x9E", # WARANG CITI CAPITAL LETTER SII
1571							"\xF0\x91\xA2\xBF" => "\xF0\x91\xA3\x9F", # WARANG CITI CAPITAL LETTER VIYO
1572							"\xF0\x9E\xA4\x80" => "\xF0\x9E\xA4\xA2", # ADLAM CAPITAL LETTER ALIF
1573							"\xF0\x9E\xA4\x81" => "\xF0\x9E\xA4\xA3", # ADLAM CAPITAL LETTER DAALI
1574							"\xF0\x9E\xA4\x82" => "\xF0\x9E\xA4\xA4", # ADLAM CAPITAL LETTER LAAM
1575							"\xF0\x9E\xA4\x83" => "\xF0\x9E\xA4\xA5", # ADLAM CAPITAL LETTER MIIM
1576							"\xF0\x9E\xA4\x84" => "\xF0\x9E\xA4\xA6", # ADLAM CAPITAL LETTER BA
1577							"\xF0\x9E\xA4\x85" => "\xF0\x9E\xA4\xA7", # ADLAM CAPITAL LETTER SINNYIIYHE
1578							"\xF0\x9E\xA4\x86" => "\xF0\x9E\xA4\xA8", # ADLAM CAPITAL LETTER PE
1579							"\xF0\x9E\xA4\x87" => "\xF0\x9E\xA4\xA9", # ADLAM CAPITAL LETTER BHE
1580							"\xF0\x9E\xA4\x88" => "\xF0\x9E\xA4\xAA", # ADLAM CAPITAL LETTER RA
1581							"\xF0\x9E\xA4\x89" => "\xF0\x9E\xA4\xAB", # ADLAM CAPITAL LETTER E
1582							"\xF0\x9E\xA4\x8A" => "\xF0\x9E\xA4\xAC", # ADLAM CAPITAL LETTER FA
1583							"\xF0\x9E\xA4\x8B" => "\xF0\x9E\xA4\xAD", # ADLAM CAPITAL LETTER I
1584							"\xF0\x9E\xA4\x8C" => "\xF0\x9E\xA4\xAE", # ADLAM CAPITAL LETTER O
1585							"\xF0\x9E\xA4\x8D" => "\xF0\x9E\xA4\xAF", # ADLAM CAPITAL LETTER DHA
1586							"\xF0\x9E\xA4\x8E" => "\xF0\x9E\xA4\xB0", # ADLAM CAPITAL LETTER YHE
1587							"\xF0\x9E\xA4\x8F" => "\xF0\x9E\xA4\xB1", # ADLAM CAPITAL LETTER WAW
1588							"\xF0\x9E\xA4\x90" => "\xF0\x9E\xA4\xB2", # ADLAM CAPITAL LETTER NUN
1589							"\xF0\x9E\xA4\x91" => "\xF0\x9E\xA4\xB3", # ADLAM CAPITAL LETTER KAF
1590							"\xF0\x9E\xA4\x92" => "\xF0\x9E\xA4\xB4", # ADLAM CAPITAL LETTER YA
1591							"\xF0\x9E\xA4\x93" => "\xF0\x9E\xA4\xB5", # ADLAM CAPITAL LETTER U
1592							"\xF0\x9E\xA4\x94" => "\xF0\x9E\xA4\xB6", # ADLAM CAPITAL LETTER JIIM
1593							"\xF0\x9E\xA4\x95" => "\xF0\x9E\xA4\xB7", # ADLAM CAPITAL LETTER CHI
1594							"\xF0\x9E\xA4\x96" => "\xF0\x9E\xA4\xB8", # ADLAM CAPITAL LETTER HA
1595							"\xF0\x9E\xA4\x97" => "\xF0\x9E\xA4\xB9", # ADLAM CAPITAL LETTER QAAF
1596							"\xF0\x9E\xA4\x98" => "\xF0\x9E\xA4\xBA", # ADLAM CAPITAL LETTER GA
1597							"\xF0\x9E\xA4\x99" => "\xF0\x9E\xA4\xBB", # ADLAM CAPITAL LETTER NYA
1598							"\xF0\x9E\xA4\x9A" => "\xF0\x9E\xA4\xBC", # ADLAM CAPITAL LETTER TU
1599							"\xF0\x9E\xA4\x9B" => "\xF0\x9E\xA4\xBD", # ADLAM CAPITAL LETTER NHA
1600							"\xF0\x9E\xA4\x9C" => "\xF0\x9E\xA4\xBE", # ADLAM CAPITAL LETTER VA
1601							"\xF0\x9E\xA4\x9D" => "\xF0\x9E\xA4\xBF", # ADLAM CAPITAL LETTER KHA
1602							"\xF0\x9E\xA4\x9E" => "\xF0\x9E\xA5\x80", # ADLAM CAPITAL LETTER GBE
1603							"\xF0\x9E\xA4\x9F" => "\xF0\x9E\xA5\x81", # ADLAM CAPITAL LETTER ZAL
1604							"\xF0\x9E\xA4\xA0" => "\xF0\x9E\xA5\x82", # ADLAM CAPITAL LETTER KPO
1605							"\xF0\x9E\xA4\xA1" => "\xF0\x9E\xA5\x83", # ADLAM CAPITAL LETTER SHA
1606							);
1607							}
1608
1609							else {
1610							croak "Don't know my package name '@{[__PACKAGE__]}'";
1611							}
1612
1613							#
1614							# @ARGV wildcard globbing
1615							#
1616							sub import {
1617
1618	0	0		0		0	if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
1619	0					0	my @argv = ();
1620	0					0	for (@ARGV) {
1621
1622							# has space
1623	0	0				0	if (/\A (?:$q_char)*? [ ] /oxms) {
		0
1624	0	0				0	if (my @glob = Eutf2::glob(qq{"$_"})) {
1625	0					0	push @argv, @glob;
1626							}
1627							else {
1628	0					0	push @argv, $_;
1629							}
1630							}
1631
1632							# has wildcard metachar
1633							elsif (/\A (?:$q_char)? [?] /oxms) {
1634	0	0				0	if (my @glob = Eutf2::glob($_)) {
1635	0					0	push @argv, @glob;
1636							}
1637							else {
1638	0					0	push @argv, $_;
1639							}
1640							}
1641
1642							# no wildcard globbing
1643							else {
1644	0					0	push @argv, $_;
1645							}
1646							}
1647	0					0	@ARGV = @argv;
1648							}
1649
1650	0					0	*Char::ord = \&UTF2::ord;
1651	0					0	*Char::ord_ = \&UTF2::ord_;
1652	0					0	*Char::reverse = \&UTF2::reverse;
1653	0					0	*Char::getc = \&UTF2::getc;
1654	0					0	*Char::length = \&UTF2::length;
1655	0					0	*Char::substr = \&UTF2::substr;
1656	0					0	*Char::index = \&UTF2::index;
1657	0					0	*Char::rindex = \&UTF2::rindex;
1658	0					0	*Char::eval = \&UTF2::eval;
1659	0					0	*Char::escape = \&UTF2::escape;
1660	0					0	*Char::escape_token = \&UTF2::escape_token;
1661	0					0	*Char::escape_script = \&UTF2::escape_script;
1662							}
1663
1664							# P.230 Care with Prototypes
1665							# in Chapter 6: Subroutines
1666							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1667							#
1668							# If you aren't careful, you can get yourself into trouble with prototypes.
1669							# But if you are careful, you can do a lot of neat things with them. This is
1670							# all very powerful, of course, and should only be used in moderation to make
1671							# the world a better place.
1672
1673							# P.332 Care with Prototypes
1674							# in Chapter 7: Subroutines
1675							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1676							#
1677							# If you aren't careful, you can get yourself into trouble with prototypes.
1678							# But if you are careful, you can do a lot of neat things with them. This is
1679							# all very powerful, of course, and should only be used in moderation to make
1680							# the world a better place.
1681
1682							#
1683							# Prototypes of subroutines
1684							#
1685				0			sub unimport {}
1686							sub Eutf2::split(;$$$);
1687							sub Eutf2::tr($$$$;$);
1688							sub Eutf2::chop(@);
1689							sub Eutf2::index($$;$);
1690							sub Eutf2::rindex($$;$);
1691							sub Eutf2::lcfirst(@);
1692							sub Eutf2::lcfirst_();
1693							sub Eutf2::lc(@);
1694							sub Eutf2::lc_();
1695							sub Eutf2::ucfirst(@);
1696							sub Eutf2::ucfirst_();
1697							sub Eutf2::uc(@);
1698							sub Eutf2::uc_();
1699							sub Eutf2::fc(@);
1700							sub Eutf2::fc_();
1701							sub Eutf2::ignorecase;
1702							sub Eutf2::classic_character_class;
1703							sub Eutf2::capture;
1704							sub Eutf2::chr(;$);
1705							sub Eutf2::chr_();
1706							sub Eutf2::glob($);
1707							sub Eutf2::glob_();
1708
1709							sub UTF2::ord(;$);
1710							sub UTF2::ord_();
1711							sub UTF2::reverse(@);
1712							sub UTF2::getc(;*@);
1713							sub UTF2::length(;$);
1714							sub UTF2::substr($$;$$);
1715							sub UTF2::index($$;$);
1716							sub UTF2::rindex($$;$);
1717							sub UTF2::escape(;$);
1718
1719							#
1720							# Regexp work
1721							#
1722	302			302		19511	BEGIN { CORE::eval q{ use vars qw(
	302			302		1699
	302					414
	302					89112
1723							$UTF2::re_a
1724							$UTF2::re_t
1725							$UTF2::re_n
1726							$UTF2::re_r
1727							) } }
1728
1729							#
1730							# Character class
1731							#
1732	302			302		17688	BEGIN { CORE::eval q{ use vars qw(
	302			302		1258
	302					394
	302					4506295
1733							$dot
1734							$dot_s
1735							$eD
1736							$eS
1737							$eW
1738							$eH
1739							$eV
1740							$eR
1741							$eN
1742							$not_alnum
1743							$not_alpha
1744							$not_ascii
1745							$not_blank
1746							$not_cntrl
1747							$not_digit
1748							$not_graph
1749							$not_lower
1750							$not_lower_i
1751							$not_print
1752							$not_punct
1753							$not_space
1754							$not_upper
1755							$not_upper_i
1756							$not_word
1757							$not_xdigit
1758							$eb
1759							$eB
1760							) } }
1761
1762							${Eutf2::dot} = qr{(?>[^\x80-\xFF\x0A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1763							${Eutf2::dot_s} = qr{(?>[^\x80-\xFF]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1764							${Eutf2::eD} = qr{(?>[^\x80-\xFF0-9]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1765
1766							# Vertical tabs are now whitespace
1767							# \s in a regex now matches a vertical tab in all circumstances.
1768							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1769							# ${Eutf2::eS} = qr{(?>[^\x80-\xFF\x09\x0A \x0C\x0D\x20]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1770							# ${Eutf2::eS} = qr{(?>[^\x80-\xFF\x09\x0A\x0B\x0C\x0D\x20]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1771							${Eutf2::eS} = qr{(?>[^\x80-\xFF\s]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1772
1773							${Eutf2::eW} = qr{(?>[^\x80-\xFF0-9A-Z_a-z]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1774							${Eutf2::eH} = qr{(?>[^\x80-\xFF\x09\x20]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1775							${Eutf2::eV} = qr{(?>[^\x80-\xFF\x0A\x0B\x0C\x0D]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1776							${Eutf2::eR} = qr{(?>\x0D\x0A\|[\x0A\x0D])};
1777							${Eutf2::eN} = qr{(?>[^\x80-\xFF\x0A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1778							${Eutf2::not_alnum} = qr{(?>[^\x80-\xFF\x30-\x39\x41-\x5A\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1779							${Eutf2::not_alpha} = qr{(?>[^\x80-\xFF\x41-\x5A\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1780							${Eutf2::not_ascii} = qr{(?>[^\x80-\xFF\x00-\x7F]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1781							${Eutf2::not_blank} = qr{(?>[^\x80-\xFF\x09\x20]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1782							${Eutf2::not_cntrl} = qr{(?>[^\x80-\xFF\x00-\x1F\x7F]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1783							${Eutf2::not_digit} = qr{(?>[^\x80-\xFF\x30-\x39]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1784							${Eutf2::not_graph} = qr{(?>[^\x80-\xFF\x21-\x7F]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1785							${Eutf2::not_lower} = qr{(?>[^\x80-\xFF\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1786							${Eutf2::not_lower_i} = qr{(?>[^\x80-\xFF\x41-\x5A\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])}; # Perl 5.16 compatible
1787							# ${Eutf2::not_lower_i} = qr{(?>[^\x80-\xFF]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])}; # older Perl compatible
1788							${Eutf2::not_print} = qr{(?>[^\x80-\xFF\x20-\x7F]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1789							${Eutf2::not_punct} = qr{(?>[^\x80-\xFF\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1790							${Eutf2::not_space} = qr{(?>[^\x80-\xFF\s\x0B]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1791							${Eutf2::not_upper} = qr{(?>[^\x80-\xFF\x41-\x5A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1792							${Eutf2::not_upper_i} = qr{(?>[^\x80-\xFF\x41-\x5A\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])}; # Perl 5.16 compatible
1793							# ${Eutf2::not_upper_i} = qr{(?>[^\x80-\xFF]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])}; # older Perl compatible
1794							${Eutf2::not_word} = qr{(?>[^\x80-\xFF\x30-\x39\x41-\x5A\x5F\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1795							${Eutf2::not_xdigit} = qr{(?>[^\x80-\xFF\x30-\x39\x41-\x46\x61-\x66]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1796							${Eutf2::eb} = qr{(?:\A(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[0-9A-Z_a-z])\|(?<=[0-9A-Z_a-z])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]\|\z))};
1797							${Eutf2::eB} = qr{(?:(?<=[0-9A-Z_a-z])(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]))};
1798
1799							# avoid: Name "Eutf2::foo" used only once: possible typo at here.
1800							${Eutf2::dot} = ${Eutf2::dot};
1801							${Eutf2::dot_s} = ${Eutf2::dot_s};
1802							${Eutf2::eD} = ${Eutf2::eD};
1803							${Eutf2::eS} = ${Eutf2::eS};
1804							${Eutf2::eW} = ${Eutf2::eW};
1805							${Eutf2::eH} = ${Eutf2::eH};
1806							${Eutf2::eV} = ${Eutf2::eV};
1807							${Eutf2::eR} = ${Eutf2::eR};
1808							${Eutf2::eN} = ${Eutf2::eN};
1809							${Eutf2::not_alnum} = ${Eutf2::not_alnum};
1810							${Eutf2::not_alpha} = ${Eutf2::not_alpha};
1811							${Eutf2::not_ascii} = ${Eutf2::not_ascii};
1812							${Eutf2::not_blank} = ${Eutf2::not_blank};
1813							${Eutf2::not_cntrl} = ${Eutf2::not_cntrl};
1814							${Eutf2::not_digit} = ${Eutf2::not_digit};
1815							${Eutf2::not_graph} = ${Eutf2::not_graph};
1816							${Eutf2::not_lower} = ${Eutf2::not_lower};
1817							${Eutf2::not_lower_i} = ${Eutf2::not_lower_i};
1818							${Eutf2::not_print} = ${Eutf2::not_print};
1819							${Eutf2::not_punct} = ${Eutf2::not_punct};
1820							${Eutf2::not_space} = ${Eutf2::not_space};
1821							${Eutf2::not_upper} = ${Eutf2::not_upper};
1822							${Eutf2::not_upper_i} = ${Eutf2::not_upper_i};
1823							${Eutf2::not_word} = ${Eutf2::not_word};
1824							${Eutf2::not_xdigit} = ${Eutf2::not_xdigit};
1825							${Eutf2::eb} = ${Eutf2::eb};
1826							${Eutf2::eB} = ${Eutf2::eB};
1827
1828							#
1829							# UTF-8 split
1830							#
1831							sub Eutf2::split(;$$$) {
1832
1833							# P.794 29.2.161. split
1834							# in Chapter 29: Functions
1835							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1836
1837							# P.951 split
1838							# in Chapter 27: Functions
1839							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1840
1841	0			0	0	0	my $pattern = $_[0];
1842	0					0	my $string = $_[1];
1843	0					0	my $limit = $_[2];
1844
1845							# if $pattern is also omitted or is the literal space, " "
1846	0	0				0	if (not defined $pattern) {
1847	0					0	$pattern = ' ';
1848							}
1849
1850							# if $string is omitted, the function splits the $_ string
1851	0	0				0	if (not defined $string) {
1852	0	0				0	if (defined $_) {
1853	0					0	$string = $_;
1854							}
1855							else {
1856	0					0	$string = '';
1857							}
1858							}
1859
1860	0					0	my @split = ();
1861
1862							# when string is empty
1863	0	0				0	if ($string eq '') {
		0
1864
1865							# resulting list value in list context
1866	0	0				0	if (wantarray) {
1867	0					0	return @split;
1868							}
1869
1870							# count of substrings in scalar context
1871							else {
1872	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
1873	0					0	@_ = @split;
1874	0					0	return scalar @_;
1875							}
1876							}
1877
1878							# split's first argument is more consistently interpreted
1879							#
1880							# After some changes earlier in v5.17, split's behavior has been simplified:
1881							# if the PATTERN argument evaluates to a string containing one space, it is
1882							# treated the way that a literal string containing one space once was.
1883							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#split's_first_argument_is_more_consistently_interpreted
1884
1885							# if $pattern is also omitted or is the literal space, " ", the function splits
1886							# on whitespace, /\s+/, after skipping any leading whitespace
1887							# (and so on)
1888
1889							elsif ($pattern eq ' ') {
1890	0	0				0	if (not defined $limit) {
1891	0					0	return CORE::split(' ', $string);
1892							}
1893							else {
1894	0					0	return CORE::split(' ', $string, $limit);
1895							}
1896							}
1897
1898							# if $limit is negative, it is treated as if an arbitrarily large $limit has been specified
1899	0	0	0			0	if ((not defined $limit) or ($limit <= 0)) {
		0
1900
1901							# a pattern capable of matching either the null string or something longer than the
1902							# null string will split the value of $string into separate characters wherever it
1903							# matches the null string between characters
1904							# (and so on)
1905
1906	0	0				0	if ('' =~ / \A $pattern \z /xms) {
1907	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
1908	0					0	my $limit = scalar(() = $string =~ /($pattern)/oxmsg);
1909
1910							# P.1024 Appendix W.10 Multibyte Processing
1911							# of ISBN 1-56592-224-7 CJKV Information Processing
1912							# (and so on)
1913
1914							# the //m modifier is assumed when you split on the pattern /^/
1915							# (and so on)
1916
1917							# V
1918	0		0			0	while ((--$limit > 0) and ($string =~ s/\A((?:$q_char)+?)$pattern//m)) {
1919
1920							# if the $pattern contains parentheses, then the substring matched by each pair of parentheses
1921							# is included in the resulting list, interspersed with the fields that are ordinarily returned
1922							# (and so on)
1923
1924	0					0	local $@;
1925	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
1926	0					0	push @split, CORE::eval('$' . $digit);
1927							}
1928							}
1929							}
1930
1931							else {
1932	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
1933
1934							# V
1935	0					0	while ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
1936	0					0	local $@;
1937	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
1938	0					0	push @split, CORE::eval('$' . $digit);
1939							}
1940							}
1941							}
1942							}
1943
1944							elsif ($limit > 0) {
1945	0	0				0	if ('' =~ / \A $pattern \z /xms) {
1946	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
1947	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
1948
1949							# V
1950	0	0				0	if ($string =~ s/\A((?:$q_char)+?)$pattern//m) {
1951	0					0	local $@;
1952	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
1953	0					0	push @split, CORE::eval('$' . $digit);
1954							}
1955							}
1956							}
1957							}
1958							else {
1959	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
1960	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
1961
1962							# V
1963	0	0				0	if ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
1964	0					0	local $@;
1965	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
1966	0					0	push @split, CORE::eval('$' . $digit);
1967							}
1968							}
1969							}
1970							}
1971							}
1972
1973	0	0				0	if (CORE::length($string) > 0) {
1974	0					0	push @split, $string;
1975							}
1976
1977							# if $_[2] (NOT "$limit") is omitted or zero, trailing null fields are stripped from the result
1978	0	0	0			0	if ((not defined $_[2]) or ($_[2] == 0)) {
1979	0		0			0	while ((scalar(@split) >= 1) and ($split[-1] eq '')) {
1980	0					0	pop @split;
1981							}
1982							}
1983
1984							# resulting list value in list context
1985	0	0				0	if (wantarray) {
1986	0					0	return @split;
1987							}
1988
1989							# count of substrings in scalar context
1990							else {
1991	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
1992	0					0	@_ = @split;
1993	0					0	return scalar @_;
1994							}
1995							}
1996
1997							#
1998							# get last subexpression offsets
1999							#
2000							sub _last_subexpression_offsets {
2001	0			0		0	my $pattern = $_[0];
2002
2003							# remove comment
2004	0					0	$pattern =~ s/$\?\# .*? $//oxmsg;
2005
2006	0					0	my $modifier = '';
2007	0	0				0	if ($pattern =~ /\(\?\^? ([\-A-Za-z]+) :/oxms) {
2008	0					0	$modifier = $1;
2009	0					0	$modifier =~ s/-[A-Za-z]*//;
2010							}
2011
2012							# with /x modifier
2013	0					0	my @char = ();
2014	0	0				0	if ($modifier =~ /x/oxms) {
2015	0					0	@char = $pattern =~ /\G((?>
2016							[^\x80-\xFF\\\#\[\(]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
2017							\\ $q_char \|
2018							\# (?>[^\n]*) $ \|
2019							\[ (?>(?:[^\x80-\xFF\\\]]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF]\|\\\\\|\\\]\|$q_char)+) \] \|
2020							\(\? \|
2021							$q_char
2022							))/oxmsg;
2023							}
2024
2025							# without /x modifier
2026							else {
2027	0					0	@char = $pattern =~ /\G((?>
2028							[^\x80-\xFF\\\[\(]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
2029							\\ $q_char \|
2030							\[ (?>(?:[^\x80-\xFF\\\]]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF]\|\\\\\|\\\]\|$q_char)+) \] \|
2031							\(\? \|
2032							$q_char
2033							))/oxmsg;
2034							}
2035
2036	0					0	return scalar grep { $_ eq '(' } @char;
	0					0
2037							}
2038
2039							#
2040							# UTF-8 transliteration (tr///)
2041							#
2042							sub Eutf2::tr($$$$;$) {
2043
2044	0			0	0	0	my $bind_operator = $_[1];
2045	0					0	my $searchlist = $_[2];
2046	0					0	my $replacementlist = $_[3];
2047	0		0			0	my $modifier = $_[4] \|\| '';
2048
2049	0	0				0	if ($modifier =~ /r/oxms) {
2050	0	0				0	if ($bind_operator =~ / !~ /oxms) {
2051	0					0	croak "Using !~ with tr///r doesn't make sense";
2052							}
2053							}
2054
2055	0					0	my @char = $_[0] =~ /\G (?>$q_char) /oxmsg;
2056	0					0	my @searchlist = _charlist_tr($searchlist);
2057	0					0	my @replacementlist = _charlist_tr($replacementlist);
2058
2059	0					0	my %tr = ();
2060	0					0	for (my $i=0; $i <= $#searchlist; $i++) {
2061	0	0				0	if (not exists $tr{$searchlist[$i]}) {
2062	0	0	0			0	if (defined $replacementlist[$i] and ($replacementlist[$i] ne '')) {
		0	0
		0
2063	0					0	$tr{$searchlist[$i]} = $replacementlist[$i];
2064							}
2065							elsif ($modifier =~ /d/oxms) {
2066	0					0	$tr{$searchlist[$i]} = '';
2067							}
2068							elsif (defined $replacementlist[-1] and ($replacementlist[-1] ne '')) {
2069	0					0	$tr{$searchlist[$i]} = $replacementlist[-1];
2070							}
2071							else {
2072	0					0	$tr{$searchlist[$i]} = $searchlist[$i];
2073							}
2074							}
2075							}
2076
2077	0					0	my $tr = 0;
2078	0					0	my $replaced = '';
2079	0	0				0	if ($modifier =~ /c/oxms) {
2080	0					0	while (defined(my $char = shift @char)) {
2081	0	0				0	if (not exists $tr{$char}) {
2082	0	0				0	if (defined $replacementlist[0]) {
2083	0					0	$replaced .= $replacementlist[0];
2084							}
2085	0					0	$tr++;
2086	0	0				0	if ($modifier =~ /s/oxms) {
2087	0		0			0	while (@char and (not exists $tr{$char[0]})) {
2088	0					0	shift @char;
2089	0					0	$tr++;
2090							}
2091							}
2092							}
2093							else {
2094	0					0	$replaced .= $char;
2095							}
2096							}
2097							}
2098							else {
2099	0					0	while (defined(my $char = shift @char)) {
2100	0	0				0	if (exists $tr{$char}) {
2101	0					0	$replaced .= $tr{$char};
2102	0					0	$tr++;
2103	0	0				0	if ($modifier =~ /s/oxms) {
2104	0		0			0	while (@char and (exists $tr{$char[0]}) and ($tr{$char[0]} eq $tr{$char})) {
			0
2105	0					0	shift @char;
2106	0					0	$tr++;
2107							}
2108							}
2109							}
2110							else {
2111	0					0	$replaced .= $char;
2112							}
2113							}
2114							}
2115
2116	0	0				0	if ($modifier =~ /r/oxms) {
2117	0					0	return $replaced;
2118							}
2119							else {
2120	0					0	$_[0] = $replaced;
2121	0	0				0	if ($bind_operator =~ / !~ /oxms) {
2122	0					0	return not $tr;
2123							}
2124							else {
2125	0					0	return $tr;
2126							}
2127							}
2128							}
2129
2130							#
2131							# UTF-8 chop
2132							#
2133							sub Eutf2::chop(@) {
2134
2135	0			0	0	0	my $chop;
2136	0	0				0	if (@_ == 0) {
2137	0					0	my @char = /\G (?>$q_char) /oxmsg;
2138	0					0	$chop = pop @char;
2139	0					0	$_ = join '', @char;
2140							}
2141							else {
2142	0					0	for (@_) {
2143	0					0	my @char = /\G (?>$q_char) /oxmsg;
2144	0					0	$chop = pop @char;
2145	0					0	$_ = join '', @char;
2146							}
2147							}
2148	0					0	return $chop;
2149							}
2150
2151							#
2152							# UTF-8 index by octet
2153							#
2154							sub Eutf2::index($$;$) {
2155
2156	0			0	1	0	my($str,$substr,$position) = @_;
2157	0		0			0	$position \|\|= 0;
2158	0					0	my $pos = 0;
2159
2160	0					0	while ($pos < CORE::length($str)) {
2161	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
2162	0	0				0	if ($pos >= $position) {
2163	0					0	return $pos;
2164							}
2165							}
2166	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
2167	0					0	$pos += CORE::length($1);
2168							}
2169							else {
2170	0					0	$pos += 1;
2171							}
2172							}
2173	0					0	return -1;
2174							}
2175
2176							#
2177							# UTF-8 reverse index
2178							#
2179							sub Eutf2::rindex($$;$) {
2180
2181	0			0	0	0	my($str,$substr,$position) = @_;
2182	0		0			0	$position \|\|= CORE::length($str) - 1;
2183	0					0	my $pos = 0;
2184	0					0	my $rindex = -1;
2185
2186	0		0			0	while (($pos < CORE::length($str)) and ($pos <= $position)) {
2187	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
2188	0					0	$rindex = $pos;
2189							}
2190	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
2191	0					0	$pos += CORE::length($1);
2192							}
2193							else {
2194	0					0	$pos += 1;
2195							}
2196							}
2197	0					0	return $rindex;
2198							}
2199
2200							#
2201							# UTF-8 lower case first with parameter
2202							#
2203							sub Eutf2::lcfirst(@) {
2204	0	0		0	0	0	if (@_) {
2205	0					0	my $s = shift @_;
2206	0	0	0			0	if (@_ and wantarray) {
2207	0					0	return Eutf2::lc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
2208							}
2209							else {
2210	0					0	return Eutf2::lc(CORE::substr($s,0,1)) . CORE::substr($s,1);
2211							}
2212							}
2213							else {
2214	0					0	return Eutf2::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
2215							}
2216							}
2217
2218							#
2219							# UTF-8 lower case first without parameter
2220							#
2221							sub Eutf2::lcfirst_() {
2222	0			0	0	0	return Eutf2::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
2223							}
2224
2225							#
2226							# UTF-8 lower case with parameter
2227							#
2228							sub Eutf2::lc(@) {
2229	0	0		0	0	0	if (@_) {
2230	0					0	my $s = shift @_;
2231	0	0	0			0	if (@_ and wantarray) {
2232	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
2233							}
2234							else {
2235	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
2236							}
2237							}
2238							else {
2239	0					0	return Eutf2::lc_();
2240							}
2241							}
2242
2243							#
2244							# UTF-8 lower case without parameter
2245							#
2246							sub Eutf2::lc_() {
2247	0			0	0	0	my $s = $_;
2248	0	0				0	return join '', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
2249							}
2250
2251							#
2252							# UTF-8 upper case first with parameter
2253							#
2254							sub Eutf2::ucfirst(@) {
2255	0	0		0	0	0	if (@_) {
2256	0					0	my $s = shift @_;
2257	0	0	0			0	if (@_ and wantarray) {
2258	0					0	return Eutf2::uc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
2259							}
2260							else {
2261	0					0	return Eutf2::uc(CORE::substr($s,0,1)) . CORE::substr($s,1);
2262							}
2263							}
2264							else {
2265	0					0	return Eutf2::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
2266							}
2267							}
2268
2269							#
2270							# UTF-8 upper case first without parameter
2271							#
2272							sub Eutf2::ucfirst_() {
2273	0			0	0	0	return Eutf2::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
2274							}
2275
2276							#
2277							# UTF-8 upper case with parameter
2278							#
2279							sub Eutf2::uc(@) {
2280	2478	50		2478	0	2552	if (@_) {
2281	2478					1844	my $s = shift @_;
2282	2478	50	33			3848	if (@_ and wantarray) {
2283	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
2284							}
2285							else {
2286	2478	100				5566	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	2478					5814
2287							}
2288							}
2289							else {
2290	0					0	return Eutf2::uc_();
2291							}
2292							}
2293
2294							#
2295							# UTF-8 upper case without parameter
2296							#
2297							sub Eutf2::uc_() {
2298	0			0	0	0	my $s = $_;
2299	0	0				0	return join '', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
2300							}
2301
2302							#
2303							# UTF-8 fold case with parameter
2304							#
2305							sub Eutf2::fc(@) {
2306	2525	50		2525	0	2579	if (@_) {
2307	2525					1751	my $s = shift @_;
2308	2525	50	33			3824	if (@_ and wantarray) {
2309	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
2310							}
2311							else {
2312	2525	100				5058	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	2525					6439
2313							}
2314							}
2315							else {
2316	0					0	return Eutf2::fc_();
2317							}
2318							}
2319
2320							#
2321							# UTF-8 fold case without parameter
2322							#
2323							sub Eutf2::fc_() {
2324	0			0	0	0	my $s = $_;
2325	0	0				0	return join '', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
2326							}
2327
2328							#
2329							# UTF-8 regexp capture
2330							#
2331							{
2332							sub Eutf2::capture {
2333	0			0	1	0	return $_[0];
2334							}
2335							}
2336
2337							#
2338							# UTF-8 regexp ignore case modifier
2339							#
2340							sub Eutf2::ignorecase {
2341
2342	0			0	0	0	my @string = @_;
2343	0					0	my $metachar = qr/[\@\\\|[\]{]/oxms;
2344
2345							# ignore case of $scalar or @array
2346	0					0	for my $string (@string) {
2347
2348							# split regexp
2349	0					0	my @char = $string =~ /\G (?>\[\^\|\\$q_char\|$q_char) /oxmsg;
2350
2351							# unescape character
2352	0					0	for (my $i=0; $i <= $#char; $i++) {
2353	0	0				0	next if not defined $char[$i];
2354
2355							# open character class [...]
2356	0	0				0	if ($char[$i] eq '[') {
		0
		0
		0
2357	0					0	my $left = $i;
2358
2359							# [] make die "unmatched [] in regexp ...\n"
2360
2361	0	0				0	if ($char[$i+1] eq ']') {
2362	0					0	$i++;
2363							}
2364
2365	0					0	while (1) {
2366	0	0				0	if (++$i > $#char) {
2367	0					0	croak "Unmatched [] in regexp";
2368							}
2369	0	0				0	if ($char[$i] eq ']') {
2370	0					0	my $right = $i;
2371	0					0	my @charlist = charlist_qr(@char[$left+1..$right-1], 'i');
2372
2373							# escape character
2374	0					0	for my $char (@charlist) {
2375	0	0				0	if (0) {
2376							}
2377
2378	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
2379	0					0	$char = '\\' . $char;
2380							}
2381							}
2382
2383							# [...]
2384	0					0	splice @char, $left, $right-$left+1, '(?:' . join('\|', @charlist) . ')';
2385
2386	0					0	$i = $left;
2387	0					0	last;
2388							}
2389							}
2390							}
2391
2392							# open character class [^...]
2393							elsif ($char[$i] eq '[^') {
2394	0					0	my $left = $i;
2395
2396							# [^] make die "unmatched [] in regexp ...\n"
2397
2398	0	0				0	if ($char[$i+1] eq ']') {
2399	0					0	$i++;
2400							}
2401
2402	0					0	while (1) {
2403	0	0				0	if (++$i > $#char) {
2404	0					0	croak "Unmatched [] in regexp";
2405							}
2406	0	0				0	if ($char[$i] eq ']') {
2407	0					0	my $right = $i;
2408	0					0	my @charlist = charlist_not_qr(@char[$left+1..$right-1], 'i');
2409
2410							# escape character
2411	0					0	for my $char (@charlist) {
2412	0	0				0	if (0) {
2413							}
2414
2415	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
2416	0					0	$char = '\\' . $char;
2417							}
2418							}
2419
2420							# [^...]
2421	0					0	splice @char, $left, $right-$left+1, '(?!' . join('\|', @charlist) . ")(?:$your_char)";
2422
2423	0					0	$i = $left;
2424	0					0	last;
2425							}
2426							}
2427							}
2428
2429							# rewrite classic character class or escape character
2430							elsif (my $char = classic_character_class($char[$i])) {
2431	0					0	$char[$i] = $char;
2432							}
2433
2434							# with /i modifier
2435							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
2436	0					0	my $uc = Eutf2::uc($char[$i]);
2437	0					0	my $fc = Eutf2::fc($char[$i]);
2438	0	0				0	if ($uc ne $fc) {
2439	0	0				0	if (CORE::length($fc) == 1) {
2440	0					0	$char[$i] = '[' . $uc . $fc . ']';
2441							}
2442							else {
2443	0					0	$char[$i] = '(?:' . $uc . '\|' . $fc . ')';
2444							}
2445							}
2446							}
2447							}
2448
2449							# characterize
2450	0					0	for (my $i=0; $i <= $#char; $i++) {
2451	0	0				0	next if not defined $char[$i];
2452
2453	0	0				0	if (0) {
2454							}
2455
2456							# quote character before ? + * {
2457	0	0				0	elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
2458	0	0				0	if ($char[$i-1] !~ /\A [\x00-\xFF] \z/oxms) {
2459	0					0	$char[$i-1] = '(?:' . $char[$i-1] . ')';
2460							}
2461							}
2462							}
2463
2464	0					0	$string = join '', @char;
2465							}
2466
2467							# make regexp string
2468	0					0	return @string;
2469							}
2470
2471							#
2472							# classic character class ( \D \S \W \d \s \w \C \X \H \V \h \v \R \N \b \B )
2473							#
2474							sub Eutf2::classic_character_class {
2475	2815			2815	0	2237	my($char) = @_;
2476
2477							return {
2478							'\D' => '${Eutf2::eD}',
2479							'\S' => '${Eutf2::eS}',
2480							'\W' => '${Eutf2::eW}',
2481							'\d' => '[0-9]',
2482
2483							# Before Perl 5.6, \s only matched the five whitespace characters
2484							# tab, newline, form-feed, carriage return, and the space character
2485							# itself, which, taken together, is the character class [\t\n\f\r ].
2486
2487							# Vertical tabs are now whitespace
2488							# \s in a regex now matches a vertical tab in all circumstances.
2489							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
2490							# \t \n \v \f \r space
2491							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
2492							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
2493							'\s' => '\s',
2494
2495							'\w' => '[0-9A-Z_a-z]',
2496							'\C' => '[\x00-\xFF]',
2497							'\X' => 'X',
2498
2499							# \h \v \H \V
2500
2501							# P.114 Character Class Shortcuts
2502							# in Chapter 7: In the World of Regular Expressions
2503							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
2504
2505							# P.357 13.2.3 Whitespace
2506							# in Chapter 13: perlrecharclass: Perl Regular Expression Character Classes
2507							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
2508							#
2509							# 0x00009 CHARACTER TABULATION h s
2510							# 0x0000a LINE FEED (LF) vs
2511							# 0x0000b LINE TABULATION v
2512							# 0x0000c FORM FEED (FF) vs
2513							# 0x0000d CARRIAGE RETURN (CR) vs
2514							# 0x00020 SPACE h s
2515
2516							# P.196 Table 5-9. Alphanumeric regex metasymbols
2517							# in Chapter 5. Pattern Matching
2518							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2519
2520							# (and so on)
2521
2522							'\H' => '${Eutf2::eH}',
2523							'\V' => '${Eutf2::eV}',
2524							'\h' => '[\x09\x20]',
2525							'\v' => '[\x0A\x0B\x0C\x0D]',
2526							'\R' => '${Eutf2::eR}',
2527
2528							# \N
2529							#
2530							# http://perldoc.perl.org/perlre.html
2531							# Character Classes and other Special Escapes
2532							# Any character but \n (experimental). Not affected by /s modifier
2533
2534							'\N' => '${Eutf2::eN}',
2535
2536							# \b \B
2537
2538							# P.180 Boundaries: The \b and \B Assertions
2539							# in Chapter 5: Pattern Matching
2540							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2541
2542							# P.219 Boundaries: The \b and \B Assertions
2543							# in Chapter 5: Pattern Matching
2544							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2545
2546							# \b really means (?:(?<=\w)(?!\w)\|(?
2547							# or (?:(?<=\A\|\W)(?=\w)\|(?<=\w)(?=\W\|\z))
2548							'\b' => '${Eutf2::eb}',
2549
2550							# \B really means (?:(?<=\w)(?=\w)\|(?
2551							# or (?:(?<=\w)(?=\w)\|(?<=\W)(?=\W))
2552							'\B' => '${Eutf2::eB}',
2553
2554	2815		100			116547	}->{$char} \|\| '';
2555							}
2556
2557							#
2558							# prepare UTF-8 characters per length
2559							#
2560
2561							# 1 octet characters
2562							my @chars1 = ();
2563							sub chars1 {
2564	0	0		0	0	0	if (@chars1) {
2565	0					0	return @chars1;
2566							}
2567	0	0				0	if (exists $range_tr{1}) {
2568	0					0	my @ranges = @{ $range_tr{1} };
	0					0
2569	0					0	while (my @range = splice(@ranges,0,1)) {
2570	0					0	for my $oct0 (@{$range[0]}) {
	0					0
2571	0					0	push @chars1, pack 'C', $oct0;
2572							}
2573							}
2574							}
2575	0					0	return @chars1;
2576							}
2577
2578							# 2 octets characters
2579							my @chars2 = ();
2580							sub chars2 {
2581	0	0		0	0	0	if (@chars2) {
2582	0					0	return @chars2;
2583							}
2584	0	0				0	if (exists $range_tr{2}) {
2585	0					0	my @ranges = @{ $range_tr{2} };
	0					0
2586	0					0	while (my @range = splice(@ranges,0,2)) {
2587	0					0	for my $oct0 (@{$range[0]}) {
	0					0
2588	0					0	for my $oct1 (@{$range[1]}) {
	0					0
2589	0					0	push @chars2, pack 'CC', $oct0,$oct1;
2590							}
2591							}
2592							}
2593							}
2594	0					0	return @chars2;
2595							}
2596
2597							# 3 octets characters
2598							my @chars3 = ();
2599							sub chars3 {
2600	0	0		0	0	0	if (@chars3) {
2601	0					0	return @chars3;
2602							}
2603	0	0				0	if (exists $range_tr{3}) {
2604	0					0	my @ranges = @{ $range_tr{3} };
	0					0
2605	0					0	while (my @range = splice(@ranges,0,3)) {
2606	0					0	for my $oct0 (@{$range[0]}) {
	0					0
2607	0					0	for my $oct1 (@{$range[1]}) {
	0					0
2608	0					0	for my $oct2 (@{$range[2]}) {
	0					0
2609	0					0	push @chars3, pack 'CCC', $oct0,$oct1,$oct2;
2610							}
2611							}
2612							}
2613							}
2614							}
2615	0					0	return @chars3;
2616							}
2617
2618							# 4 octets characters
2619							my @chars4 = ();
2620							sub chars4 {
2621	0	0		0	0	0	if (@chars4) {
2622	0					0	return @chars4;
2623							}
2624	0	0				0	if (exists $range_tr{4}) {
2625	0					0	my @ranges = @{ $range_tr{4} };
	0					0
2626	0					0	while (my @range = splice(@ranges,0,4)) {
2627	0					0	for my $oct0 (@{$range[0]}) {
	0					0
2628	0					0	for my $oct1 (@{$range[1]}) {
	0					0
2629	0					0	for my $oct2 (@{$range[2]}) {
	0					0
2630	0					0	for my $oct3 (@{$range[3]}) {
	0					0
2631	0					0	push @chars4, pack 'CCCC', $oct0,$oct1,$oct2,$oct3;
2632							}
2633							}
2634							}
2635							}
2636							}
2637							}
2638	0					0	return @chars4;
2639							}
2640
2641							#
2642							# UTF-8 open character list for tr
2643							#
2644							sub _charlist_tr {
2645
2646	0			0		0	local $_ = shift @_;
2647
2648							# unescape character
2649	0					0	my @char = ();
2650	0					0	while (not /\G \z/oxmsgc) {
2651	0	0				0	if (/\G (\\0?55\|\\x2[Dd]\|\\-) /oxmsgc) {
		0
		0
		0
		0
		0
		0
2652	0					0	push @char, '\-';
2653							}
2654							elsif (/\G \\ ([0-7]{2,3}) /oxmsgc) {
2655	0					0	push @char, CORE::chr(oct $1);
2656							}
2657							elsif (/\G \\x ([0-9A-Fa-f]{1,2}) /oxmsgc) {
2658	0					0	push @char, CORE::chr(hex $1);
2659							}
2660							elsif (/\G \\c ([\x40-\x5F]) /oxmsgc) {
2661	0					0	push @char, CORE::chr(CORE::ord($1) & 0x1F);
2662							}
2663							elsif (/\G (\\ [0nrtfbae]) /oxmsgc) {
2664							push @char, {
2665							'\0' => "\0",
2666							'\n' => "\n",
2667							'\r' => "\r",
2668							'\t' => "\t",
2669							'\f' => "\f",
2670							'\b' => "\x08", # \b means backspace in character class
2671							'\a' => "\a",
2672							'\e' => "\e",
2673	0					0	}->{$1};
2674							}
2675							elsif (/\G \\ ($q_char) /oxmsgc) {
2676	0					0	push @char, $1;
2677							}
2678							elsif (/\G ($q_char) /oxmsgc) {
2679	0					0	push @char, $1;
2680							}
2681							}
2682
2683							# join separated multiple-octet
2684	0					0	@char = join('',@char) =~ /\G (?>\\-\|$q_char) /oxmsg;
2685
2686							# unescape '-'
2687	0					0	my @i = ();
2688	0					0	for my $i (0 .. $#char) {
2689	0	0				0	if ($char[$i] eq '\-') {
		0
2690	0					0	$char[$i] = '-';
2691							}
2692							elsif ($char[$i] eq '-') {
2693	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
2694	0					0	push @i, $i;
2695							}
2696							}
2697							}
2698
2699							# open character list (reverse for splice)
2700	0					0	for my $i (CORE::reverse @i) {
2701	0					0	my @range = ();
2702
2703							# range error
2704	0	0	0			0	if ((CORE::length($char[$i-1]) > CORE::length($char[$i+1])) or ($char[$i-1] gt $char[$i+1])) {
2705	0					0	croak "Invalid tr/// range \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2706							}
2707
2708							# range of multiple-octet code
2709	0	0				0	if (CORE::length($char[$i-1]) == 1) {
		0
		0
		0
2710	0	0				0	if (CORE::length($char[$i+1]) == 1) {
		0
		0
		0
2711	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars1();
	0					0
2712							}
2713							elsif (CORE::length($char[$i+1]) == 2) {
2714	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
2715	0					0	push @range, grep {$_ le $char[$i+1]} chars2();
	0					0
2716							}
2717							elsif (CORE::length($char[$i+1]) == 3) {
2718	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
2719	0					0	push @range, chars2();
2720	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
2721							}
2722							elsif (CORE::length($char[$i+1]) == 4) {
2723	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
2724	0					0	push @range, chars2();
2725	0					0	push @range, chars3();
2726	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
2727							}
2728							else {
2729	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2730							}
2731							}
2732							elsif (CORE::length($char[$i-1]) == 2) {
2733	0	0				0	if (CORE::length($char[$i+1]) == 2) {
		0
		0
2734	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars2();
	0					0
2735							}
2736							elsif (CORE::length($char[$i+1]) == 3) {
2737	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
2738	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
2739							}
2740							elsif (CORE::length($char[$i+1]) == 4) {
2741	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
2742	0					0	push @range, chars3();
2743	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
2744							}
2745							else {
2746	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2747							}
2748							}
2749							elsif (CORE::length($char[$i-1]) == 3) {
2750	0	0				0	if (CORE::length($char[$i+1]) == 3) {
		0
2751	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars3();
	0					0
2752							}
2753							elsif (CORE::length($char[$i+1]) == 4) {
2754	0					0	push @range, grep {$char[$i-1] le $_} chars3();
	0					0
2755	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
2756							}
2757							else {
2758	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2759							}
2760							}
2761							elsif (CORE::length($char[$i-1]) == 4) {
2762	0	0				0	if (CORE::length($char[$i+1]) == 4) {
2763	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars4();
	0					0
2764							}
2765							else {
2766	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2767							}
2768							}
2769							else {
2770	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2771							}
2772
2773	0					0	splice @char, $i-1, 3, @range;
2774							}
2775
2776	0					0	return @char;
2777							}
2778
2779							#
2780							# UTF-8 open character class
2781							#
2782							sub _cc {
2783	933	50		933		1555	if (scalar(@_) == 0) {
		100
		50
2784	0					0	die __FILE__, ": subroutine cc got no parameter.\n";
2785							}
2786							elsif (scalar(@_) == 1) {
2787	482					1194	return sprintf('\x%02X',$_[0]);
2788							}
2789							elsif (scalar(@_) == 2) {
2790	451	50				933	if ($_[0] > $_[1]) {
		100
		50
2791	0					0	die __FILE__, ": subroutine cc got \$_[0] > \$_[1] parameters).\n";
2792							}
2793							elsif ($_[0] == $_[1]) {
2794	40					69	return sprintf('\x%02X',$_[0]);
2795							}
2796							elsif (($_[0]+1) == $_[1]) {
2797	0					0	return sprintf('[\\x%02X\\x%02X]',$_[0],$_[1]);
2798							}
2799							else {
2800	411					1392	return sprintf('[\\x%02X-\\x%02X]',$_[0],$_[1]);
2801							}
2802							}
2803							else {
2804	0					0	die __FILE__, ": subroutine cc got 3 or more parameters (@{[scalar(@_)]} parameters).\n";
	0					0
2805							}
2806							}
2807
2808							#
2809							# UTF-8 octet range
2810							#
2811							sub _octets {
2812	577			577		699	my $length = shift @_;
2813
2814	577	100				1063	if ($length == 1) {
		100
		50
		0
2815	406					1102	my($a1) = unpack 'C', $_[0];
2816	406					526	my($z1) = unpack 'C', $_[1];
2817
2818	406	50				675	if ($a1 > $z1) {
2819	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$a1) . '-\x' . unpack('H',$z1);
2820							}
2821
2822	406	50				764	if ($a1 == $z1) {
		50
2823	0					0	return sprintf('\x%02X',$a1);
2824							}
2825							elsif (($a1+1) == $z1) {
2826	0					0	return sprintf('\x%02X\x%02X',$a1,$z1);
2827							}
2828							else {
2829	406					2387	return sprintf('\x%02X-\x%02X',$a1,$z1);
2830							}
2831							}
2832							elsif ($length == 2) {
2833	20					37	my($a1,$a2) = unpack 'CC', $_[0];
2834	20					27	my($z1,$z2) = unpack 'CC', $_[1];
2835	20					21	my($A1,$A2) = unpack 'CC', $_[2];
2836	20					23	my($Z1,$Z2) = unpack 'CC', $_[3];
2837
2838	20	50				45	if ($a1 == $z1) {
		50
2839							return (
2840							# 11111111 222222222222
2841							# A A Z
2842	0					0	_cc($a1) . _cc($a2,$z2), # a2-z2
2843							);
2844							}
2845							elsif (($a1+1) == $z1) {
2846							return (
2847							# 11111111111 222222222222
2848							# A Z A Z
2849	0					0	_cc($a1) . _cc($a2,$Z2), # a2-
2850							_cc( $z1) . _cc($A2,$z2), # -z2
2851							);
2852							}
2853							else {
2854							return (
2855							# 1111111111111111 222222222222
2856							# A Z A Z
2857	20					36	_cc($a1) . _cc($a2,$Z2), # a2-
2858							_cc($a1+1,$z1-1) . _cc($A2,$Z2), # -
2859							_cc( $z1) . _cc($A2,$z2), # -z2
2860							);
2861							}
2862							}
2863							elsif ($length == 3) {
2864	151					386	my($a1,$a2,$a3) = unpack 'CCC', $_[0];
2865	151					235	my($z1,$z2,$z3) = unpack 'CCC', $_[1];
2866	151					211	my($A1,$A2,$A3) = unpack 'CCC', $_[2];
2867	151					211	my($Z1,$Z2,$Z3) = unpack 'CCC', $_[3];
2868
2869	151	100				243	if ($a1 == $z1) {
		50
2870	131	100				220	if ($a2 == $z2) {
		50
2871							return (
2872							# 11111111 22222222 333333333333
2873							# A A A Z
2874	111					268	_cc($a1) . _cc($a2) . _cc($a3,$z3), # a3-z3
2875							);
2876							}
2877							elsif (($a2+1) == $z2) {
2878							return (
2879							# 11111111 22222222222 333333333333
2880							# A A Z A Z
2881	0					0	_cc($a1) . _cc($a2) . _cc($a3,$Z3), # a3-
2882							_cc($a1) . _cc( $z2) . _cc($A3,$z3), # -z3
2883							);
2884							}
2885							else {
2886							return (
2887							# 11111111 2222222222222222 333333333333
2888							# A A Z A Z
2889	20					24	_cc($a1) . _cc($a2) . _cc($a3,$Z3), # a3-
2890							_cc($a1) . _cc($a2+1,$z2-1) . _cc($A3,$Z3), # -
2891							_cc($a1) . _cc( $z2) . _cc($A3,$z3), # -z3
2892							);
2893							}
2894							}
2895							elsif (($a1+1) == $z1) {
2896							return (
2897							# 11111111111 22222222222222 333333333333
2898							# A Z A Z A Z
2899	0					0	_cc($a1) . _cc($a2) . _cc($a3,$Z3), # a3-
2900							_cc($a1) . _cc($a2+1,$Z2) . _cc($A3,$Z3), # -
2901							_cc( $z1) . _cc($A2,$z2-1) . _cc($A3,$Z3), # -
2902							_cc( $z1) . _cc( $z2) . _cc($A3,$z3), # -z3
2903							);
2904							}
2905							else {
2906							return (
2907							# 1111111111111111 22222222222222 333333333333
2908							# A Z A Z A Z
2909	20					26	_cc($a1) . _cc($a2) . _cc($a3,$Z3), # a3-
2910							_cc($a1) . _cc($a2+1,$Z2) . _cc($A3,$Z3), # -
2911							_cc($a1+1,$z1-1) . _cc($A2,$Z2) . _cc($A3,$Z3), # -
2912							_cc( $z1) . _cc($A2,$z2-1) . _cc($A3,$Z3), # -
2913							_cc( $z1) . _cc( $z2) . _cc($A3,$z3), # -z3
2914							);
2915							}
2916							}
2917							elsif ($length == 4) {
2918	0					0	my($a1,$a2,$a3,$a4) = unpack 'CCCC', $_[0];
2919	0					0	my($z1,$z2,$z3,$z4) = unpack 'CCCC', $_[1];
2920	0					0	my($A1,$A2,$A3,$A4) = unpack 'CCCC', $_[0];
2921	0					0	my($Z1,$Z2,$Z3,$Z4) = unpack 'CCCC', $_[1];
2922
2923	0	0				0	if ($a1 == $z1) {
		0
2924	0	0				0	if ($a2 == $z2) {
		0
2925	0	0				0	if ($a3 == $z3) {
		0
2926							return (
2927							# 11111111 22222222 33333333 444444444444
2928							# A A A A Z
2929	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$z4), # a4-z4
2930							);
2931							}
2932							elsif (($a3+1) == $z3) {
2933							return (
2934							# 11111111 22222222 33333333333 444444444444
2935							# A A A Z A Z
2936	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2937							_cc($a1) . _cc($a2) . _cc( $z3) . _cc($A4,$z4), # -z4
2938							);
2939							}
2940							else {
2941							return (
2942							# 11111111 22222222 3333333333333333 444444444444
2943							# A A A Z A Z
2944	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2945							_cc($a1) . _cc($a2) . _cc($a3+1,$z3-1) . _cc($A4,$Z4), # -
2946							_cc($a1) . _cc($a2) . _cc( $z3) . _cc($A4,$z4), # -z4
2947							);
2948							}
2949							}
2950							elsif (($a2+1) == $z2) {
2951							return (
2952							# 11111111 22222222222 33333333333333 444444444444
2953							# A A Z A Z A Z
2954	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2955							_cc($a1) . _cc($a2) . _cc($a3+1,$Z3) . _cc($A4,$Z4), # -
2956							_cc($a1) . _cc( $z2) . _cc($A3,$z3-1) . _cc($A4,$Z4), # -
2957							_cc($a1) . _cc( $z2) . _cc( $z3) . _cc($A4,$z4), # -z4
2958							);
2959							}
2960							else {
2961							return (
2962							# 11111111 2222222222222222 33333333333333 444444444444
2963							# A A Z A Z A Z
2964	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2965							_cc($a1) . _cc($a2) . _cc($a3+1,$Z3) . _cc($A4,$Z4), # -
2966							_cc($a1) . _cc($a2+1,$z2-1) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2967							_cc($a1) . _cc( $z2) . _cc($A3,$z3-1) . _cc($A4,$Z4), # -
2968							_cc($a1) . _cc( $z2) . _cc( $z3) . _cc($A4,$z4), # -z4
2969							);
2970							}
2971							}
2972							elsif (($a1+1) == $z1) {
2973							return (
2974							# 11111111111 22222222222222 33333333333333 444444444444
2975							# A Z A Z A Z A Z
2976	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2977							_cc($a1) . _cc($a2) . _cc($a3+1,$Z3) . _cc($A4,$Z4), # -
2978							_cc($a1) . _cc($a2+1,$Z2) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2979							_cc( $z1) . _cc($A2,$z2-1) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2980							_cc( $z1) . _cc( $z2) . _cc($A3,$z3-1) . _cc($A4,$Z4), # -
2981							_cc( $z1) . _cc( $z2) . _cc( $z3) . _cc($A4,$z4), # -z4
2982							);
2983							}
2984							else {
2985							return (
2986							# 1111111111111111 22222222222222 33333333333333 444444444444
2987							# A Z A Z A Z A Z
2988	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2989							_cc($a1) . _cc($a2) . _cc($a3+1,$Z3) . _cc($A4,$Z4), # -
2990							_cc($a1) . _cc($a2+1,$Z2) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2991							_cc($a1+1,$z1-1) . _cc($A2,$Z2) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2992							_cc( $z1) . _cc($A2,$z2-1) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2993							_cc( $z1) . _cc( $z2) . _cc($A3,$z3-1) . _cc($A4,$Z4), # -
2994							_cc( $z1) . _cc( $z2) . _cc( $z3) . _cc($A4,$z4), # -z4
2995							);
2996							}
2997							}
2998							else {
2999	0					0	die __FILE__, ": subroutine _octets got invalid length ($length).\n";
3000							}
3001							}
3002
3003							#
3004							# UTF-8 range regexp
3005							#
3006							sub _range_regexp {
3007	537			537		669	my($length,$first,$last) = @_;
3008
3009	537					579	my @range_regexp = ();
3010	537	50				1241	if (not exists $range_tr{$length}) {
3011	0					0	return @range_regexp;
3012							}
3013
3014	537					488	my @ranges = @{ $range_tr{$length} };
	537					1231
3015	537					1535	while (my @range = splice(@ranges,0,$length)) {
3016	1316					1091	my $min = '';
3017	1316					896	my $max = '';
3018	1316					1914	for (my $i=0; $i < $length; $i++) {
3019	2384					3584	$min .= pack 'C', $range[$i][0];
3020	2384					3843	$max .= pack 'C', $range[$i][-1];
3021							}
3022
3023							# min___max
3024							# FIRST_____________LAST
3025							# (nothing)
3026
3027	1316	100	66			12463	if ($max lt $first) {
		100	100
		50	33
		100	100
		100	66
		100	66
		50	66
3028							}
3029
3030							# **********
3031							# min_________max
3032							# FIRST_____________LAST
3033							# **********
3034
3035							elsif (($min le $first) and ($first le $max) and ($max le $last)) {
3036	28					49	push @range_regexp, _octets($length,$first,$max,$min,$max);
3037							}
3038
3039							# **********************
3040							# min________________max
3041							# FIRST_____________LAST
3042							# **********************
3043
3044							elsif (($min eq $first) and ($max eq $last)) {
3045	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
3046							}
3047
3048							# *********
3049							# min___max
3050							# FIRST_____________LAST
3051							# *********
3052
3053							elsif (($first le $min) and ($max le $last)) {
3054	60					75	push @range_regexp, _octets($length,$min,$max,$min,$max);
3055							}
3056
3057							# **********************
3058							# min__________________________max
3059							# FIRST_____________LAST
3060							# **********************
3061
3062							elsif (($min le $first) and ($last le $max)) {
3063	469					903	push @range_regexp, _octets($length,$first,$last,$min,$max);
3064							}
3065
3066							# *********
3067							# min________max
3068							# FIRST_____________LAST
3069							# *********
3070
3071							elsif (($first le $min) and ($min le $last) and ($last le $max)) {
3072	20					23	push @range_regexp, _octets($length,$min,$last,$min,$max);
3073							}
3074
3075							# min___max
3076							# FIRST_____________LAST
3077							# (nothing)
3078
3079							elsif ($last lt $min) {
3080							}
3081
3082							else {
3083	0					0	die __FILE__, ": subroutine _range_regexp panic.\n";
3084							}
3085							}
3086
3087	537					1022	return @range_regexp;
3088							}
3089
3090							#
3091							# UTF-8 open character list for qr and not qr
3092							#
3093							sub _charlist {
3094
3095	770			770		1003	my $modifier = pop @_;
3096	770					1270	my @char = @_;
3097
3098	770	100				1631	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
3099
3100							# unescape character
3101	770					1976	for (my $i=0; $i <= $#char; $i++) {
3102
3103							# escape - to ...
3104	2660	100	100			21112	if ($char[$i] eq '-') {
		50
		50
		50
		50
		50
		100
		50
		100
		100
		100
		100
3105	522	100	100			1928	if ((0 < $i) and ($i < $#char)) {
3106	497					918	$char[$i] = '...';
3107							}
3108							}
3109
3110							# octal escape sequence
3111							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
3112	0					0	$char[$i] = octchr($1);
3113							}
3114
3115							# hexadecimal escape sequence
3116							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
3117	0					0	$char[$i] = hexchr($1);
3118							}
3119
3120							# \b{...} --> b\{...}
3121							# \B{...} --> B\{...}
3122							# \N{CHARNAME} --> N\{CHARNAME}
3123							# \p{PROPERTY} --> p\{PROPERTY}
3124							# \P{PROPERTY} --> P\{PROPERTY}
3125							elsif ($char[$i] =~ /\A \\ ([bBNpP]) ( \{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
3126	0					0	$char[$i] = $1 . '\\' . $2;
3127							}
3128
3129							# \p, \P, \X --> p, P, X
3130							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
3131	0					0	$char[$i] = $1;
3132							}
3133
3134							elsif ($char[$i] =~ /\A \\ ([0-7]{2,3}) \z/oxms) {
3135	0					0	$char[$i] = CORE::chr oct $1;
3136							}
3137							elsif ($char[$i] =~ /\A \\x ([0-9A-Fa-f]{1,2}) \z/oxms) {
3138	206					794	$char[$i] = CORE::chr hex $1;
3139							}
3140							elsif ($char[$i] =~ /\A \\c ([\x40-\x5F]) \z/oxms) {
3141	0					0	$char[$i] = CORE::chr(CORE::ord($1) & 0x1F);
3142							}
3143							elsif ($char[$i] =~ /\A (\\ [0nrtfbaedswDSWHVhvR]) \z/oxms) {
3144							$char[$i] = {
3145							'\0' => "\0",
3146							'\n' => "\n",
3147							'\r' => "\r",
3148							'\t' => "\t",
3149							'\f' => "\f",
3150							'\b' => "\x08", # \b means backspace in character class
3151							'\a' => "\a",
3152							'\e' => "\e",
3153							'\d' => '[0-9]',
3154
3155							# Vertical tabs are now whitespace
3156							# \s in a regex now matches a vertical tab in all circumstances.
3157							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
3158							# \t \n \v \f \r space
3159							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
3160							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
3161							'\s' => '\s',
3162
3163							'\w' => '[0-9A-Z_a-z]',
3164							'\D' => '${Eutf2::eD}',
3165							'\S' => '${Eutf2::eS}',
3166							'\W' => '${Eutf2::eW}',
3167
3168							'\H' => '${Eutf2::eH}',
3169							'\V' => '${Eutf2::eV}',
3170							'\h' => '[\x09\x20]',
3171							'\v' => '[\x0A\x0B\x0C\x0D]',
3172							'\R' => '${Eutf2::eR}',
3173
3174	33					440	}->{$1};
3175							}
3176
3177							# POSIX-style character classes
3178							elsif ($ignorecase and ($char[$i] =~ /\A ( \[\: \^? (?:lower\|upper) :\] ) \z/oxms)) {
3179							$char[$i] = {
3180
3181							'[:lower:]' => '[\x41-\x5A\x61-\x7A]',
3182							'[:upper:]' => '[\x41-\x5A\x61-\x7A]',
3183							'[:^lower:]' => '${Eutf2::not_lower_i}',
3184							'[:^upper:]' => '${Eutf2::not_upper_i}',
3185
3186	8					55	}->{$1};
3187							}
3188							elsif ($char[$i] =~ /\A ( \[\: \^? (?:alnum\|alpha\|ascii\|blank\|cntrl\|digit\|graph\|lower\|print\|punct\|space\|upper\|word\|xdigit) :\] ) \z/oxms) {
3189							$char[$i] = {
3190
3191							'[:alnum:]' => '[\x30-\x39\x41-\x5A\x61-\x7A]',
3192							'[:alpha:]' => '[\x41-\x5A\x61-\x7A]',
3193							'[:ascii:]' => '[\x00-\x7F]',
3194							'[:blank:]' => '[\x09\x20]',
3195							'[:cntrl:]' => '[\x00-\x1F\x7F]',
3196							'[:digit:]' => '[\x30-\x39]',
3197							'[:graph:]' => '[\x21-\x7F]',
3198							'[:lower:]' => '[\x61-\x7A]',
3199							'[:print:]' => '[\x20-\x7F]',
3200							'[:punct:]' => '[\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]',
3201
3202							# P.174 POSIX-Style Character Classes
3203							# in Chapter 5: Pattern Matching
3204							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
3205
3206							# P.311 11.2.4 Character Classes and other Special Escapes
3207							# in Chapter 11: perlre: Perl regular expressions
3208							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
3209
3210							# P.210 POSIX-Style Character Classes
3211							# in Chapter 5: Pattern Matching
3212							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
3213
3214							'[:space:]' => '[\s\x0B]', # "\s" plus vertical tab ("\cK")
3215
3216							'[:upper:]' => '[\x41-\x5A]',
3217							'[:word:]' => '[\x30-\x39\x41-\x5A\x5F\x61-\x7A]',
3218							'[:xdigit:]' => '[\x30-\x39\x41-\x46\x61-\x66]',
3219							'[:^alnum:]' => '${Eutf2::not_alnum}',
3220							'[:^alpha:]' => '${Eutf2::not_alpha}',
3221							'[:^ascii:]' => '${Eutf2::not_ascii}',
3222							'[:^blank:]' => '${Eutf2::not_blank}',
3223							'[:^cntrl:]' => '${Eutf2::not_cntrl}',
3224							'[:^digit:]' => '${Eutf2::not_digit}',
3225							'[:^graph:]' => '${Eutf2::not_graph}',
3226							'[:^lower:]' => '${Eutf2::not_lower}',
3227							'[:^print:]' => '${Eutf2::not_print}',
3228							'[:^punct:]' => '${Eutf2::not_punct}',
3229							'[:^space:]' => '${Eutf2::not_space}',
3230							'[:^upper:]' => '${Eutf2::not_upper}',
3231							'[:^word:]' => '${Eutf2::not_word}',
3232							'[:^xdigit:]' => '${Eutf2::not_xdigit}',
3233
3234	70					1203	}->{$1};
3235							}
3236							elsif ($char[$i] =~ /\A \\ ($q_char) \z/oxms) {
3237	7					29	$char[$i] = $1;
3238							}
3239							}
3240
3241							# open character list
3242	770					960	my @singleoctet = ();
3243	770					720	my @multipleoctet = ();
3244	770					1566	for (my $i=0; $i <= $#char; ) {
3245
3246							# escaped -
3247	2163	100	100			8916	if (defined($char[$i+1]) and ($char[$i+1] eq '...')) {
		100
		100
		50
		50
		100
3248	497					410	$i += 1;
3249	497					815	next;
3250							}
3251
3252							# make range regexp
3253							elsif ($char[$i] eq '...') {
3254
3255							# range error
3256	497	50				1569	if (CORE::length($char[$i-1]) > CORE::length($char[$i+1])) {
		100
3257	0					0	croak 'Invalid [] range in regexp (length(A) > length(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
3258							}
3259							elsif (CORE::length($char[$i-1]) == CORE::length($char[$i+1])) {
3260	477	50				1101	if ($char[$i-1] gt $char[$i+1]) {
3261	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
3262							}
3263							}
3264
3265							# make range regexp per length
3266	497					1293	for my $length (CORE::length($char[$i-1]) .. CORE::length($char[$i+1])) {
3267	537					608	my @regexp = ();
3268
3269							# is first and last
3270	537	100	100			2107	if (($length == CORE::length($char[$i-1])) and ($length == CORE::length($char[$i+1]))) {
		100	66
		100
		50
3271	477					1193	push @regexp, _range_regexp($length, $char[$i-1], $char[$i+1]);
3272							}
3273
3274							# is first
3275							elsif ($length == CORE::length($char[$i-1])) {
3276	20					61	push @regexp, _range_regexp($length, $char[$i-1], "\xFF" x $length);
3277							}
3278
3279							# is inside in first and last
3280							elsif ((CORE::length($char[$i-1]) < $length) and ($length < CORE::length($char[$i+1]))) {
3281	20					61	push @regexp, _range_regexp($length, "\x00" x $length, "\xFF" x $length);
3282							}
3283
3284							# is last
3285							elsif ($length == CORE::length($char[$i+1])) {
3286	20					39	push @regexp, _range_regexp($length, "\x00" x $length, $char[$i+1]);
3287							}
3288
3289							else {
3290	0					0	die __FILE__, ": subroutine make_regexp panic.\n";
3291							}
3292
3293	537	100				822	if ($length == 1) {
3294	386					736	push @singleoctet, @regexp;
3295							}
3296							else {
3297	151					307	push @multipleoctet, @regexp;
3298							}
3299							}
3300
3301	497					983	$i += 2;
3302							}
3303
3304							# with /i modifier
3305							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
3306	764	100				897	if ($modifier =~ /i/oxms) {
3307	192					303	my $uc = Eutf2::uc($char[$i]);
3308	192					318	my $fc = Eutf2::fc($char[$i]);
3309	192	50				286	if ($uc ne $fc) {
3310	192	50				262	if (CORE::length($fc) == 1) {
3311	192					299	push @singleoctet, $uc, $fc;
3312							}
3313							else {
3314	0					0	push @singleoctet, $uc;
3315	0					0	push @multipleoctet, $fc;
3316							}
3317							}
3318							else {
3319	0					0	push @singleoctet, $char[$i];
3320							}
3321							}
3322							else {
3323	572					607	push @singleoctet, $char[$i];
3324							}
3325	764					1082	$i += 1;
3326							}
3327
3328							# single character of single octet code
3329							elsif ($char[$i] =~ /\A (?: \\h ) \z/oxms) {
3330	0					0	push @singleoctet, "\t", "\x20";
3331	0					0	$i += 1;
3332							}
3333							elsif ($char[$i] =~ /\A (?: \\v ) \z/oxms) {
3334	0					0	push @singleoctet, "\x0A", "\x0B", "\x0C", "\x0D";
3335	0					0	$i += 1;
3336							}
3337							elsif ($char[$i] =~ /\A (?: \\d \| \\s \| \\w ) \z/oxms) {
3338	2					4	push @singleoctet, $char[$i];
3339	2					5	$i += 1;
3340							}
3341
3342							# single character of multiple-octet code
3343							else {
3344	403					458	push @multipleoctet, $char[$i];
3345	403					606	$i += 1;
3346							}
3347							}
3348
3349							# quote metachar
3350	770					1414	for (@singleoctet) {
3351	1364	50				5858	if ($_ eq '...') {
		100
		100
		100
		100
3352	0					0	$_ = '-';
3353							}
3354							elsif (/\A \n \z/oxms) {
3355	8					18	$_ = '\n';
3356							}
3357							elsif (/\A \r \z/oxms) {
3358	8					14	$_ = '\r';
3359							}
3360							elsif (/\A ([\x00-\x20\x7F-\xFF]) \z/oxms) {
3361	1					5	$_ = sprintf('\x%02X', CORE::ord $1);
3362							}
3363							elsif (/\A [\x00-\xFF] \z/oxms) {
3364	939					1093	$_ = quotemeta $_;
3365							}
3366							}
3367
3368							# return character list
3369	770					2003	return \@singleoctet, \@multipleoctet;
3370							}
3371
3372							#
3373							# UTF-8 octal escape sequence
3374							#
3375							sub octchr {
3376	5			5	0	14	my($octdigit) = @_;
3377
3378	5					6	my @binary = ();
3379	5					15	for my $octal (split(//,$octdigit)) {
3380							push @binary, {
3381							'0' => '000',
3382							'1' => '001',
3383							'2' => '010',
3384							'3' => '011',
3385							'4' => '100',
3386							'5' => '101',
3387							'6' => '110',
3388							'7' => '111',
3389	50					148	}->{$octal};
3390							}
3391	5					14	my $binary = join '', @binary;
3392
3393							my $octchr = {
3394							# 1234567
3395							1 => pack('B*', "0000000$binary"),
3396							2 => pack('B*', "000000$binary"),
3397							3 => pack('B*', "00000$binary"),
3398							4 => pack('B*', "0000$binary"),
3399							5 => pack('B*', "000$binary"),
3400							6 => pack('B*', "00$binary"),
3401							7 => pack('B*', "0$binary"),
3402							0 => pack('B*', "$binary"),
3403
3404	5					57	}->{CORE::length($binary) % 8};
3405
3406	5					19	return $octchr;
3407							}
3408
3409							#
3410							# UTF-8 hexadecimal escape sequence
3411							#
3412							sub hexchr {
3413	5			5	0	29	my($hexdigit) = @_;
3414
3415							my $hexchr = {
3416							1 => pack('H*', "0$hexdigit"),
3417							0 => pack('H*', "$hexdigit"),
3418
3419	5					44	}->{CORE::length($_[0]) % 2};
3420
3421	5					16	return $hexchr;
3422							}
3423
3424							#
3425							# UTF-8 open character list for qr
3426							#
3427							sub charlist_qr {
3428
3429	531			531	0	800	my $modifier = pop @_;
3430	531					1059	my @char = @_;
3431
3432	531					1309	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
3433	531					900	my @singleoctet = @$singleoctet;
3434	531					671	my @multipleoctet = @$multipleoctet;
3435
3436							# return character list
3437	531	100				1068	if (scalar(@singleoctet) >= 1) {
3438
3439							# with /i modifier
3440	384	100				757	if ($modifier =~ m/i/oxms) {
3441	107					197	my %singleoctet_ignorecase = ();
3442	107					175	for (@singleoctet) {
3443	272		66			990	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
3444	80					334	for my $ord (hex($1) .. hex($2)) {
3445	1091					902	my $char = CORE::chr($ord);
3446	1091					1120	my $uc = Eutf2::uc($char);
3447	1091					1221	my $fc = Eutf2::fc($char);
3448	1091	100				1268	if ($uc eq $fc) {
3449	502					946	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
3450							}
3451							else {
3452	589	50				612	if (CORE::length($fc) == 1) {
3453	589					942	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
3454	589					1200	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
3455							}
3456							else {
3457	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
3458	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
3459							}
3460							}
3461							}
3462							}
3463	272	100				448	if ($_ ne '') {
3464	192					486	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
3465							}
3466							}
3467	107					139	my $i = 0;
3468	107					134	my @singleoctet_ignorecase = ();
3469	107					177	for my $ord (0 .. 255) {
3470	27392	100				24293	if (exists $singleoctet_ignorecase{$ord}) {
3471	1622					915	push @{$singleoctet_ignorecase[$i]}, $ord;
	1622					1972
3472							}
3473							else {
3474	25770					17031	$i++;
3475							}
3476							}
3477	107					197	@singleoctet = ();
3478	107					267	for my $range (@singleoctet_ignorecase) {
3479	11367	100				15537	if (ref $range) {
3480	214	50				213	if (scalar(@{$range}) == 1) {
	214	50				336
3481	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
3482							}
3483	214					271	elsif (scalar(@{$range}) == 2) {
3484	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
3485							}
3486							else {
3487	214					218	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	214					212
	214					1046
3488							}
3489							}
3490							}
3491							}
3492
3493	384					467	my $not_anchor = '';
3494	384					355	$not_anchor = '(?!(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF]))';
3495
3496	384					882	push @multipleoctet, join('', $not_anchor, '[', @singleoctet, ']' );
3497							}
3498	531	100				904	if (scalar(@multipleoctet) >= 2) {
3499	102					635	return '(?:' . join('\|', @multipleoctet) . ')';
3500							}
3501							else {
3502	429					1655	return $multipleoctet[0];
3503							}
3504							}
3505
3506							#
3507							# UTF-8 open character list for not qr
3508							#
3509							sub charlist_not_qr {
3510
3511	239			239	0	333	my $modifier = pop @_;
3512	239					423	my @char = @_;
3513
3514	239					489	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
3515	239					367	my @singleoctet = @$singleoctet;
3516	239					278	my @multipleoctet = @$multipleoctet;
3517
3518							# with /i modifier
3519	239	100				457	if ($modifier =~ m/i/oxms) {
3520	128					176	my %singleoctet_ignorecase = ();
3521	128					162	for (@singleoctet) {
3522	272		66			889	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
3523	80					255	for my $ord (hex($1) .. hex($2)) {
3524	1091					848	my $char = CORE::chr($ord);
3525	1091					1022	my $uc = Eutf2::uc($char);
3526	1091					1165	my $fc = Eutf2::fc($char);
3527	1091	100				1185	if ($uc eq $fc) {
3528	502					881	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
3529							}
3530							else {
3531	589	50				581	if (CORE::length($fc) == 1) {
3532	589					891	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
3533	589					1141	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
3534							}
3535							else {
3536	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
3537	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
3538							}
3539							}
3540							}
3541							}
3542	272	100				403	if ($_ ne '') {
3543	192					372	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
3544							}
3545							}
3546	128					112	my $i = 0;
3547	128					128	my @singleoctet_ignorecase = ();
3548	128					179	for my $ord (0 .. 255) {
3549	32768	100				28377	if (exists $singleoctet_ignorecase{$ord}) {
3550	1622					890	push @{$singleoctet_ignorecase[$i]}, $ord;
	1622					1863
3551							}
3552							else {
3553	31146					19930	$i++;
3554							}
3555							}
3556	128					196	@singleoctet = ();
3557	128					231	for my $range (@singleoctet_ignorecase) {
3558	11367	100				15004	if (ref $range) {
3559	214	50				137	if (scalar(@{$range}) == 1) {
	214	50				287
3560	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
3561							}
3562	214					240	elsif (scalar(@{$range}) == 2) {
3563	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
3564							}
3565							else {
3566	214					176	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	214					219
	214					917
3567							}
3568							}
3569							}
3570							}
3571
3572							# return character list
3573	239	100				432	if (scalar(@multipleoctet) >= 1) {
3574	114	100				167	if (scalar(@singleoctet) >= 1) {
3575
3576							# any character other than multiple-octet and single octet character class
3577	70					469	return '(?!' . join('\|', @multipleoctet) . ')(?:[^\x80-\xFF' . join('', @singleoctet) . ']\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])';
3578							}
3579							else {
3580
3581							# any character other than multiple-octet character class
3582	44					269	return '(?!' . join('\|', @multipleoctet) . ")(?:$your_char)";
3583							}
3584							}
3585							else {
3586	125	50				209	if (scalar(@singleoctet) >= 1) {
3587
3588							# any character other than single octet character class
3589	125					708	return '(?:[^\x80-\xFF' . join('', @singleoctet) . ']\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])';
3590							}
3591							else {
3592
3593							# any character
3594	0					0	return "(?:$your_char)";
3595							}
3596							}
3597							}
3598
3599							#
3600							# open file in read mode
3601							#
3602							sub _open_r {
3603	604			604		1261	my(undef,$file) = @_;
3604	604					1563	$file =~ s#\A (\s) #./$1#oxms;
3605	604		33			44939	return CORE::eval(q{open($_[0],'<',$_[1])}) \|\|
3606							open($_[0],"< $file\0");
3607							}
3608
3609							#
3610							# open file in write mode
3611							#
3612							sub _open_w {
3613	0			0		0	my(undef,$file) = @_;
3614	0					0	$file =~ s#\A (\s) #./$1#oxms;
3615	0		0			0	return CORE::eval(q{open($_[0],'>',$_[1])}) \|\|
3616							open($_[0],"> $file\0");
3617							}
3618
3619							#
3620							# open file in append mode
3621							#
3622							sub _open_a {
3623	0			0		0	my(undef,$file) = @_;
3624	0					0	$file =~ s#\A (\s) #./$1#oxms;
3625	0		0			0	return CORE::eval(q{open($_[0],'>>',$_[1])}) \|\|
3626							open($_[0],">> $file\0");
3627							}
3628
3629							#
3630							# safe system
3631							#
3632							sub _systemx {
3633
3634							# P.707 29.2.33. exec
3635							# in Chapter 29: Functions
3636							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
3637							#
3638							# Be aware that in older releases of Perl, exec (and system) did not flush
3639							# your output buffer, so you needed to enable command buffering by setting $\|
3640							# on one or more filehandles to avoid lost output in the case of exec, or
3641							# misordererd output in the case of system. This situation was largely remedied
3642							# in the 5.6 release of Perl. (So, 5.005 release not yet.)
3643
3644							# P.855 exec
3645							# in Chapter 27: Functions
3646							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
3647							#
3648							# In very old release of Perl (before v5.6), exec (and system) did not flush
3649							# your output buffer, so you needed to enable command buffering by setting $\|
3650							# on one or more filehandles to avoid lost output with exec or misordered
3651							# output with system.
3652
3653	302			302		1066	$\| = 1;
3654
3655							# P.565 23.1.2. Cleaning Up Your Environment
3656							# in Chapter 23: Security
3657							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
3658
3659							# P.656 Cleaning Up Your Environment
3660							# in Chapter 20: Security
3661							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
3662
3663							# local $ENV{'PATH'} = '.';
3664	302					2404	local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer
3665
3666							# P.707 29.2.33. exec
3667							# in Chapter 29: Functions
3668							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
3669							#
3670							# As we mentioned earlier, exec treats a discrete list of arguments as an
3671							# indication that it should bypass shell processing. However, there is one
3672							# place where you might still get tripped up. The exec call (and system, too)
3673							# will not distinguish between a single scalar argument and an array containing
3674							# only one element.
3675							#
3676							# @args = ("echo surprise"); # just one element in list
3677							# exec @args # still subject to shell escapes
3678							# or die "exec: $!"; # because @args == 1
3679							#
3680							# To avoid this, you can use the PATHNAME syntax, explicitly duplicating the
3681							# first argument as the pathname, which forces the rest of the arguments to be
3682							# interpreted as a list, even if there is only one of them:
3683							#
3684							# exec { $args[0] } @args # safe even with one-argument list
3685							# or die "can't exec @args: $!";
3686
3687							# P.855 exec
3688							# in Chapter 27: Functions
3689							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
3690							#
3691							# As we mentioned earlier, exec treats a discrete list of arguments as a
3692							# directive to bypass shell processing. However, there is one place where
3693							# you might still get tripped up. The exec call (and system, too) cannot
3694							# distinguish between a single scalar argument and an array containing
3695							# only one element.
3696							#
3697							# @args = ("echo surprise"); # just one element in list
3698							# exec @args # still subject to shell escapes
3699							# \|\| die "exec: $!"; # because @args == 1
3700							#
3701							# To avoid this, use the PATHNAME syntax, explicitly duplicating the first
3702							# argument as the pathname, which forces the rest of the arguments to be
3703							# interpreted as a list, even if there is only one of them:
3704							#
3705							# exec { $args[0] } @args # safe even with one-argument list
3706							# \|\| die "can't exec @args: $!";
3707
3708	302					483	return CORE::system { $_[0] } @_; # safe even with one-argument list
	302					30996039
3709							}
3710
3711							#
3712							# UTF-8 order to character (with parameter)
3713							#
3714							sub Eutf2::chr(;$) {
3715
3716	0	0		0	0	0	my $c = @_ ? $_[0] : $_;
3717
3718	0	0				0	if ($c == 0x00) {
3719	0					0	return "\x00";
3720							}
3721							else {
3722	0					0	my @chr = ();
3723	0					0	while ($c > 0) {
3724	0					0	unshift @chr, ($c % 0x100);
3725	0					0	$c = int($c / 0x100);
3726							}
3727	0					0	return pack 'C*', @chr;
3728							}
3729							}
3730
3731							#
3732							# UTF-8 order to character (without parameter)
3733							#
3734							sub Eutf2::chr_() {
3735
3736	0			0	0	0	my $c = $_;
3737
3738	0	0				0	if ($c == 0x00) {
3739	0					0	return "\x00";
3740							}
3741							else {
3742	0					0	my @chr = ();
3743	0					0	while ($c > 0) {
3744	0					0	unshift @chr, ($c % 0x100);
3745	0					0	$c = int($c / 0x100);
3746							}
3747	0					0	return pack 'C*', @chr;
3748							}
3749							}
3750
3751							#
3752							# UTF-8 path globbing (with parameter)
3753							#
3754							sub Eutf2::glob($) {
3755
3756	0	0		0	0	0	if (wantarray) {
3757	0					0	my @glob = _DOS_like_glob(@_);
3758	0					0	for my $glob (@glob) {
3759	0					0	$glob =~ s{ \A (?:\./)+ }{}oxms;
3760							}
3761	0					0	return @glob;
3762							}
3763							else {
3764	0					0	my $glob = _DOS_like_glob(@_);
3765	0					0	$glob =~ s{ \A (?:\./)+ }{}oxms;
3766	0					0	return $glob;
3767							}
3768							}
3769
3770							#
3771							# UTF-8 path globbing (without parameter)
3772							#
3773							sub Eutf2::glob_() {
3774
3775	0	0		0	0	0	if (wantarray) {
3776	0					0	my @glob = _DOS_like_glob();
3777	0					0	for my $glob (@glob) {
3778	0					0	$glob =~ s{ \A (?:\./)+ }{}oxms;
3779							}
3780	0					0	return @glob;
3781							}
3782							else {
3783	0					0	my $glob = _DOS_like_glob();
3784	0					0	$glob =~ s{ \A (?:\./)+ }{}oxms;
3785	0					0	return $glob;
3786							}
3787							}
3788
3789							#
3790							# UTF-8 path globbing via File::DosGlob 1.10
3791							#
3792							# Often I confuse "_dosglob" and "_doglob".
3793							# So, I renamed "_dosglob" to "_DOS_like_glob".
3794							#
3795							my %iter;
3796							my %entries;
3797							sub _DOS_like_glob {
3798
3799							# context (keyed by second cxix argument provided by core)
3800	0			0		0	my($expr,$cxix) = @_;
3801
3802							# glob without args defaults to $_
3803	0	0				0	$expr = $_ if not defined $expr;
3804
3805							# represents the current user's home directory
3806							#
3807							# 7.3. Expanding Tildes in Filenames
3808							# in Chapter 7. File Access
3809							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
3810							#
3811							# and File::HomeDir, File::HomeDir::Windows module
3812
3813							# DOS-like system
3814	0	0				0	if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
3815	0					0	$expr =~ s{ \A ~ (?= [^/\\] ) }
3816	0					0	{ my_home_MSWin32() }oxmse;
3817							}
3818
3819							# UNIX-like system
3820							else {
3821	0					0	$expr =~ s{ \A ~ ( (?:[^\x80-\xFF/]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])* ) }
3822	0	0	0			0	{ $1 ? (CORE::eval(q{(getpwnam($1))[7]})\|\|my_home()) : my_home() }oxmse;
3823							}
3824
3825							# assume global context if not provided one
3826	0	0				0	$cxix = '_G_' if not defined $cxix;
3827	0	0				0	$iter{$cxix} = 0 if not exists $iter{$cxix};
3828
3829							# if we're just beginning, do it all first
3830	0	0				0	if ($iter{$cxix} == 0) {
3831	0					0	$entries{$cxix} = [ _do_glob(1, _parse_line($expr)) ];
3832							}
3833
3834							# chuck it all out, quick or slow
3835	0	0				0	if (wantarray) {
3836	0					0	delete $iter{$cxix};
3837	0					0	return @{delete $entries{$cxix}};
	0					0
3838							}
3839							else {
3840	0	0				0	if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
	0					0
3841	0					0	return shift @{$entries{$cxix}};
	0					0
3842							}
3843							else {
3844							# return undef for EOL
3845	0					0	delete $iter{$cxix};
3846	0					0	delete $entries{$cxix};
3847	0					0	return undef;
3848							}
3849							}
3850							}
3851
3852							#
3853							# UTF-8 path globbing subroutine
3854							#
3855							sub _do_glob {
3856
3857	0			0		0	my($cond,@expr) = @_;
3858	0					0	my @glob = ();
3859	0					0	my $fix_drive_relative_paths = 0;
3860
3861							OUTER:
3862	0					0	for my $expr (@expr) {
3863	0	0				0	next OUTER if not defined $expr;
3864	0	0				0	next OUTER if $expr eq '';
3865
3866	0					0	my @matched = ();
3867	0					0	my @globdir = ();
3868	0					0	my $head = '.';
3869	0					0	my $pathsep = '/';
3870	0					0	my $tail;
3871
3872							# if argument is within quotes strip em and do no globbing
3873	0	0				0	if ($expr =~ /\A " ((?:$q_char)*?) " \z/oxms) {
3874	0					0	$expr = $1;
3875	0	0				0	if ($cond eq 'd') {
3876	0	0				0	if (-d $expr) {
3877	0					0	push @glob, $expr;
3878							}
3879							}
3880							else {
3881	0	0				0	if (-e $expr) {
3882	0					0	push @glob, $expr;
3883							}
3884							}
3885	0					0	next OUTER;
3886							}
3887
3888							# wildcards with a drive prefix such as h:*.pm must be changed
3889							# to h:./*.pm to expand correctly
3890	0	0				0	if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
3891	0	0				0	if ($expr =~ s# \A ((?:[A-Za-z]:)?) ([^\x80-\xFF/\\]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF]) #$1./$2#oxms) {
3892	0					0	$fix_drive_relative_paths = 1;
3893							}
3894							}
3895
3896	0	0				0	if (($head, $tail) = _parse_path($expr,$pathsep)) {
3897	0	0				0	if ($tail eq '') {
3898	0					0	push @glob, $expr;
3899	0					0	next OUTER;
3900							}
3901	0	0				0	if ($head =~ / \A (?:$q_char)? [?] /oxms) {
3902	0	0				0	if (@globdir = _do_glob('d', $head)) {
3903	0					0	push @glob, _do_glob($cond, map {"$_$pathsep$tail"} @globdir);
	0					0
3904	0					0	next OUTER;
3905							}
3906							}
3907	0	0	0			0	if ($head eq '' or $head =~ /\A [A-Za-z]: \z/oxms) {
3908	0					0	$head .= $pathsep;
3909							}
3910	0					0	$expr = $tail;
3911							}
3912
3913							# If file component has no wildcards, we can avoid opendir
3914	0	0				0	if ($expr !~ / \A (?:$q_char)? [?] /oxms) {
3915	0	0				0	if ($head eq '.') {
3916	0					0	$head = '';
3917							}
3918	0	0	0			0	if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
3919	0					0	$head .= $pathsep;
3920							}
3921	0					0	$head .= $expr;
3922	0	0				0	if ($cond eq 'd') {
3923	0	0				0	if (-d $head) {
3924	0					0	push @glob, $head;
3925							}
3926							}
3927							else {
3928	0	0				0	if (-e $head) {
3929	0					0	push @glob, $head;
3930							}
3931							}
3932	0					0	next OUTER;
3933							}
3934	0	0				0	opendir(*DIR, $head) or next OUTER;
3935	0					0	my @leaf = readdir DIR;
3936	0					0	closedir DIR;
3937
3938	0	0				0	if ($head eq '.') {
3939	0					0	$head = '';
3940							}
3941	0	0	0			0	if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
3942	0					0	$head .= $pathsep;
3943							}
3944
3945	0					0	my $pattern = '';
3946	0					0	while ($expr =~ / \G ($q_char) /oxgc) {
3947	0					0	my $char = $1;
3948
3949							# 6.9. Matching Shell Globs as Regular Expressions
3950							# in Chapter 6. Pattern Matching
3951							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
3952							# (and so on)
3953
3954	0	0				0	if ($char eq '*') {
		0
		0
3955	0					0	$pattern .= "(?:$your_char)*",
3956							}
3957							elsif ($char eq '?') {
3958	0					0	$pattern .= "(?:$your_char)?", # DOS style
3959							# $pattern .= "(?:$your_char)", # UNIX style
3960							}
3961							elsif ((my $fc = Eutf2::fc($char)) ne $char) {
3962	0					0	$pattern .= $fc;
3963							}
3964							else {
3965	0					0	$pattern .= quotemeta $char;
3966							}
3967							}
3968	0			0		0	my $matchsub = sub { Eutf2::fc($_[0]) =~ /\A $pattern \z/xms };
	0					0
3969
3970							# if ($@) {
3971							# print STDERR "$0: $@\n";
3972							# next OUTER;
3973							# }
3974
3975							INNER:
3976	0					0	for my $leaf (@leaf) {
3977	0	0	0			0	if ($leaf eq '.' or $leaf eq '..') {
3978	0					0	next INNER;
3979							}
3980	0	0	0			0	if ($cond eq 'd' and not -d "$head$leaf") {
3981	0					0	next INNER;
3982							}
3983
3984	0	0				0	if (&$matchsub($leaf)) {
3985	0					0	push @matched, "$head$leaf";
3986	0					0	next INNER;
3987							}
3988
3989							# [DOS compatibility special case]
3990							# Failed, add a trailing dot and try again, but only...
3991
3992	0	0	0			0	if (Eutf2::index($leaf,'.') == -1 and # if name does not have a dot in it and
			0
3993							CORE::length($leaf) <= 8 and # name is shorter than or equal to 8 chars and
3994							Eutf2::index($pattern,'\\.') != -1 # pattern has a dot.
3995							) {
3996	0	0				0	if (&$matchsub("$leaf.")) {
3997	0					0	push @matched, "$head$leaf";
3998	0					0	next INNER;
3999							}
4000							}
4001							}
4002	0	0				0	if (@matched) {
4003	0					0	push @glob, @matched;
4004							}
4005							}
4006	0	0				0	if ($fix_drive_relative_paths) {
4007	0					0	for my $glob (@glob) {
4008	0					0	$glob =~ s# \A ([A-Za-z]:) \./ #$1#oxms;
4009							}
4010							}
4011	0					0	return @glob;
4012							}
4013
4014							#
4015							# UTF-8 parse line
4016							#
4017							sub _parse_line {
4018
4019	0			0		0	my($line) = @_;
4020
4021	0					0	$line .= ' ';
4022	0					0	my @piece = ();
4023	0					0	while ($line =~ /
4024							" ( (?>(?: [^\x80-\xFF"] \|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] )* ) ) " (?>\s+) \|
4025							( (?>(?: [^\x80-\xFF"\s]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] )* ) ) (?>\s+)
4026							/oxmsg
4027							) {
4028	0	0				0	push @piece, defined($1) ? $1 : $2;
4029							}
4030	0					0	return @piece;
4031							}
4032
4033							#
4034							# UTF-8 parse path
4035							#
4036							sub _parse_path {
4037
4038	0			0		0	my($path,$pathsep) = @_;
4039
4040	0					0	$path .= '/';
4041	0					0	my @subpath = ();
4042	0					0	while ($path =~ /
4043							((?: [^\x80-\xFF\/\\]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] )+?) [\/\\]
4044							/oxmsg
4045							) {
4046	0					0	push @subpath, $1;
4047							}
4048
4049	0					0	my $tail = pop @subpath;
4050	0					0	my $head = join $pathsep, @subpath;
4051	0					0	return $head, $tail;
4052							}
4053
4054							#
4055							# via File::HomeDir::Windows 1.00
4056							#
4057							sub my_home_MSWin32 {
4058
4059							# A lot of unix people and unix-derived tools rely on
4060							# the ability to overload HOME. We will support it too
4061							# so that they can replace raw HOME calls with File::HomeDir.
4062	0	0	0	0	0	0	if (exists $ENV{'HOME'} and $ENV{'HOME'}) {
		0	0
		0	0
			0
			0
4063	0					0	return $ENV{'HOME'};
4064							}
4065
4066							# Do we have a user profile?
4067							elsif (exists $ENV{'USERPROFILE'} and $ENV{'USERPROFILE'}) {
4068	0					0	return $ENV{'USERPROFILE'};
4069							}
4070
4071							# Some Windows use something like $ENV{'HOME'}
4072							elsif (exists $ENV{'HOMEDRIVE'} and exists $ENV{'HOMEPATH'} and $ENV{'HOMEDRIVE'} and $ENV{'HOMEPATH'}) {
4073	0					0	return join '', $ENV{'HOMEDRIVE'}, $ENV{'HOMEPATH'};
4074							}
4075
4076	0					0	return undef;
4077							}
4078
4079							#
4080							# via File::HomeDir::Unix 1.00
4081							#
4082							sub my_home {
4083	0			0	0	0	my $home;
4084
4085	0	0	0			0	if (exists $ENV{'HOME'} and defined $ENV{'HOME'}) {
		0	0
4086	0					0	$home = $ENV{'HOME'};
4087							}
4088
4089							# This is from the original code, but I'm guessing
4090							# it means "login directory" and exists on some Unixes.
4091							elsif (exists $ENV{'LOGDIR'} and $ENV{'LOGDIR'}) {
4092	0					0	$home = $ENV{'LOGDIR'};
4093							}
4094
4095							### More-desperate methods
4096
4097							# Light desperation on any (Unixish) platform
4098							else {
4099	0					0	$home = CORE::eval q{ (getpwuid($<))[7] };
4100							}
4101
4102							# On Unix in general, a non-existant home means "no home"
4103							# For example, "nobody"-like users might use /nonexistant
4104	0	0	0			0	if (defined $home and ! -d($home)) {
4105	0					0	$home = undef;
4106							}
4107	0					0	return $home;
4108							}
4109
4110							#
4111							# ${^PREMATCH}, $PREMATCH, $` the string preceding what was matched
4112							#
4113							sub Eutf2::PREMATCH {
4114	0			0	0	0	return $`;
4115							}
4116
4117							#
4118							# ${^MATCH}, $MATCH, $& the string that matched
4119							#
4120							sub Eutf2::MATCH {
4121	0			0	0	0	return $&;
4122							}
4123
4124							#
4125							# ${^POSTMATCH}, $POSTMATCH, $' the string following what was matched
4126							#
4127							sub Eutf2::POSTMATCH {
4128	0			0	0	0	return $';
4129							}
4130
4131							#
4132							# UTF-8 character to order (with parameter)
4133							#
4134							sub UTF2::ord(;$) {
4135
4136	0	0		0	1	0	local $_ = shift if @_;
4137
4138	0	0				0	if (/\A ($q_char) /oxms) {
4139	0					0	my @ord = unpack 'C*', $1;
4140	0					0	my $ord = 0;
4141	0					0	while (my $o = shift @ord) {
4142	0					0	$ord = $ord * 0x100 + $o;
4143							}
4144	0					0	return $ord;
4145							}
4146							else {
4147	0					0	return CORE::ord $_;
4148							}
4149							}
4150
4151							#
4152							# UTF-8 character to order (without parameter)
4153							#
4154							sub UTF2::ord_() {
4155
4156	0	0		0	0	0	if (/\A ($q_char) /oxms) {
4157	0					0	my @ord = unpack 'C*', $1;
4158	0					0	my $ord = 0;
4159	0					0	while (my $o = shift @ord) {
4160	0					0	$ord = $ord * 0x100 + $o;
4161							}
4162	0					0	return $ord;
4163							}
4164							else {
4165	0					0	return CORE::ord $_;
4166							}
4167							}
4168
4169							#
4170							# UTF-8 reverse
4171							#
4172							sub UTF2::reverse(@) {
4173
4174	0	0		0	0	0	if (wantarray) {
4175	0					0	return CORE::reverse @_;
4176							}
4177							else {
4178
4179							# One of us once cornered Larry in an elevator and asked him what
4180							# problem he was solving with this, but he looked as far off into
4181							# the distance as he could in an elevator and said, "It seemed like
4182							# a good idea at the time."
4183
4184	0					0	return join '', CORE::reverse(join('',@_) =~ /\G ($q_char) /oxmsg);
4185							}
4186							}
4187
4188							#
4189							# UTF-8 getc (with parameter, without parameter)
4190							#
4191							sub UTF2::getc(;*@) {
4192
4193	0			0	0	0	my($package) = caller;
4194	0	0				0	my $fh = @_ ? qualify_to_ref(shift,$package) : \*STDIN;
4195	0	0	0			0	croak 'Too many arguments for UTF2::getc' if @_ and not wantarray;
4196
4197	0					0	my @length = sort { $a <=> $b } keys %range_tr;
	0					0
4198	0					0	my $getc = '';
4199	0					0	for my $length ($length[0] .. $length[-1]) {
4200	0					0	$getc .= CORE::getc($fh);
4201	0	0				0	if (exists $range_tr{CORE::length($getc)}) {
4202	0	0				0	if ($getc =~ /\A ${Eutf2::dot_s} \z/oxms) {
4203	0	0				0	return wantarray ? ($getc,@_) : $getc;
4204							}
4205							}
4206							}
4207	0	0				0	return wantarray ? ($getc,@_) : $getc;
4208							}
4209
4210							#
4211							# UTF-8 length by character
4212							#
4213							sub UTF2::length(;$) {
4214
4215	0	0		0	1	0	local $_ = shift if @_;
4216
4217	0					0	local @_ = /\G ($q_char) /oxmsg;
4218	0					0	return scalar @_;
4219							}
4220
4221							#
4222							# UTF-8 substr by character
4223							#
4224							BEGIN {
4225
4226							# P.232 The lvalue Attribute
4227							# in Chapter 6: Subroutines
4228							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
4229
4230							# P.336 The lvalue Attribute
4231							# in Chapter 7: Subroutines
4232							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
4233
4234							# P.144 8.4 Lvalue subroutines
4235							# in Chapter 8: perlsub: Perl subroutines
4236							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
4237
4238	302	50	0	302	1	195243	CORE::eval sprintf(<<'END', ($] >= 5.014000) ? ':lvalue' : '');
	0	0		0		0
	0	0				0
	0	0				0
	0	0				0
	0	0				0
	0	0				0
	0	0				0
	0	0				0
	0	0				0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
	0					0
4239							# vv----------------------*******
4240							sub UTF2::substr($$;$$) %s {
4241
4242							my @char = $_[0] =~ /\G (?>$q_char) /oxmsg;
4243
4244							# If the substring is beyond either end of the string, substr() returns the undefined
4245							# value and produces a warning. When used as an lvalue, specifying a substring that
4246							# is entirely outside the string raises an exception.
4247							# http://perldoc.perl.org/functions/substr.html
4248
4249							# A return with no argument returns the scalar value undef in scalar context,
4250							# an empty list () in list context, and (naturally) nothing at all in void
4251							# context.
4252
4253							my $offset = $_[1];
4254							if (($offset > scalar(@char)) or ($offset < (-1 * scalar(@char)))) {
4255							return;
4256							}
4257
4258							# substr($string,$offset,$length,$replacement)
4259							if (@_ == 4) {
4260							my(undef,undef,$length,$replacement) = @_;
4261							my $substr = join '', splice(@char, $offset, $length, $replacement);
4262							$_[0] = join '', @char;
4263
4264							# return $substr; this doesn't work, don't say "return"
4265							$substr;
4266							}
4267
4268							# substr($string,$offset,$length)
4269							elsif (@_ == 3) {
4270							my(undef,undef,$length) = @_;
4271							my $octet_offset = 0;
4272							my $octet_length = 0;
4273							if ($offset == 0) {
4274							$octet_offset = 0;
4275							}
4276							elsif ($offset > 0) {
4277							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
4278							}
4279							else {
4280							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
4281							}
4282							if ($length == 0) {
4283							$octet_length = 0;
4284							}
4285							elsif ($length > 0) {
4286							$octet_length = CORE::length(join '', @char[$offset..$offset+$length-1]);
4287							}
4288							else {
4289							$octet_length = -1 * CORE::length(join '', @char[$#char+$length+1..$#char]);
4290							}
4291							CORE::substr($_[0], $octet_offset, $octet_length);
4292							}
4293
4294							# substr($string,$offset)
4295							else {
4296							my $octet_offset = 0;
4297							if ($offset == 0) {
4298							$octet_offset = 0;
4299							}
4300							elsif ($offset > 0) {
4301							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
4302							}
4303							else {
4304							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
4305							}
4306							CORE::substr($_[0], $octet_offset);
4307							}
4308							}
4309							END
4310							}
4311
4312							#
4313							# UTF-8 index by character
4314							#
4315							sub UTF2::index($$;$) {
4316
4317	0			0	1	0	my $index;
4318	0	0				0	if (@_ == 3) {
4319	0					0	$index = Eutf2::index($_[0], $_[1], CORE::length(UTF2::substr($_[0], 0, $_[2])));
4320							}
4321							else {
4322	0					0	$index = Eutf2::index($_[0], $_[1]);
4323							}
4324
4325	0	0				0	if ($index == -1) {
4326	0					0	return -1;
4327							}
4328							else {
4329	0					0	return UTF2::length(CORE::substr $_[0], 0, $index);
4330							}
4331							}
4332
4333							#
4334							# UTF-8 rindex by character
4335							#
4336							sub UTF2::rindex($$;$) {
4337
4338	0			0	1	0	my $rindex;
4339	0	0				0	if (@_ == 3) {
4340	0					0	$rindex = Eutf2::rindex($_[0], $_[1], CORE::length(UTF2::substr($_[0], 0, $_[2])));
4341							}
4342							else {
4343	0					0	$rindex = Eutf2::rindex($_[0], $_[1]);
4344							}
4345
4346	0	0				0	if ($rindex == -1) {
4347	0					0	return -1;
4348							}
4349							else {
4350	0					0	return UTF2::length(CORE::substr $_[0], 0, $rindex);
4351							}
4352							}
4353
4354							# when 'm//', '/' means regexp match 'm//' and '?' means regexp match '??'
4355							# when 'div', '/' means division operator and '?' means conditional operator (condition ? then : else)
4356	302			302		20581	BEGIN { CORE::eval q{ use vars qw($slash) } } $slash = 'm//';
	302			302		1931
	302					448
	302					19690
4357
4358							# ord() to ord() or UTF2::ord()
4359	302			302		15828	BEGIN { CORE::eval q{ use vars qw($function_ord) } } $function_ord = 'ord';
	302			302		1215
	302					447
	302					16367
4360
4361							# ord to ord or UTF2::ord_
4362	302			302		15450	BEGIN { CORE::eval q{ use vars qw($function_ord_) } } $function_ord_ = 'ord';
	302			302		1196
	302					429
	302					16517
4363
4364							# reverse to reverse or UTF2::reverse
4365	302			302		14869	BEGIN { CORE::eval q{ use vars qw($function_reverse) } } $function_reverse = 'reverse';
	302			302		1141
	302					398
	302					16885
4366
4367							# getc to getc or UTF2::getc
4368	302			302		14074	BEGIN { CORE::eval q{ use vars qw($function_getc) } } $function_getc = 'getc';
	302			302		1380
	302					409
	302					17966
4369
4370							# P.1023 Appendix W.9 Multibyte Anchoring
4371							# of ISBN 1-56592-224-7 CJKV Information Processing
4372
4373							my $anchor = '';
4374
4375	302			302		14904	BEGIN { CORE::eval q{ use vars qw($nest) } }
	302			302		1163
	302					397
	302					15054510
4376
4377							# regexp of nested parens in qqXX
4378
4379							# P.340 Matching Nested Constructs with Embedded Code
4380							# in Chapter 7: Perl
4381							# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
4382
4383							my $qq_paren = qr{(?{local $nest=0}) (?>(?:
4384							[^\x80-\xFF\\()] \|
4385							\( (?{$nest++}) \|
4386							\) (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4387							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4388							\\ [^\x80-\xFFc] \|
4389							\\c[\x40-\x5F] \|
4390							\\ (?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4391							[\x00-\xFF]
4392							}xms;
4393
4394							my $qq_brace = qr{(?{local $nest=0}) (?>(?:
4395							[^\x80-\xFF\\{}] \|
4396							\{ (?{$nest++}) \|
4397							\} (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4398							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4399							\\ [^\x80-\xFFc] \|
4400							\\c[\x40-\x5F] \|
4401							\\ (?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4402							[\x00-\xFF]
4403							}xms;
4404
4405							my $qq_bracket = qr{(?{local $nest=0}) (?>(?:
4406							[^\x80-\xFF\\\[\]] \|
4407							\[ (?{$nest++}) \|
4408							\] (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4409							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4410							\\ [^\x80-\xFFc] \|
4411							\\c[\x40-\x5F] \|
4412							\\ (?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4413							[\x00-\xFF]
4414							}xms;
4415
4416							my $qq_angle = qr{(?{local $nest=0}) (?>(?:
4417							[^\x80-\xFF\\<>] \|
4418							\< (?{$nest++}) \|
4419							\> (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4420							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4421							\\ [^\x80-\xFFc] \|
4422							\\c[\x40-\x5F] \|
4423							\\ (?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4424							[\x00-\xFF]
4425							}xms;
4426
4427							my $qq_scalar = qr{(?: \{ (?:$qq_brace)*? \} \|
4428							(?: ::)? (?:
4429							(?> [a-zA-Z_][a-zA-Z_0-9]* (?: ::[a-zA-Z_][a-zA-Z_0-9]) )
4430							(?>(?: \[ (?: \$\[ \| \$\] \| $qq_char )? \] \| \{ (?:$qq_brace)? \} )*)
4431							(?>(?: (?: -> )? (?: [\$\@\%\&\]\ \| \$\#\* \| [\@\%]? \[ (?: \$\[ \| \$\] \| $qq_char )? \] \| [\@\%\]? \{ (?:$qq_brace)? \} ) ))
4432							))
4433							}xms;
4434
4435							my $qq_variable = qr{(?: \{ (?:$qq_brace)*? \} \|
4436							(?: ::)? (?:
4437							(?>[0-9]+) \|
4438							[^\x80-\xFFa-zA-Z_0-9\[\]] \|
4439							^[A-Z] \|
4440							(?> [a-zA-Z_][a-zA-Z_0-9]* (?: ::[a-zA-Z_][a-zA-Z_0-9]) )
4441							(?>(?: \[ (?: \$\[ \| \$\] \| $qq_char )? \] \| \{ (?:$qq_brace)? \} )*)
4442							(?>(?: (?: -> )? (?: [\$\@\%\&\]\ \| \$\#\* \| [\@\%]? \[ (?: \$\[ \| \$\] \| $qq_char )? \] \| [\@\%\]? \{ (?:$qq_brace)? \} ) ))
4443							))
4444							}xms;
4445
4446							my $qq_substr = qr{(?> Char::substr \| UTF2::substr \| CORE::substr \| substr ) (?>\s*) $ $qq_paren $
4447							}xms;
4448
4449							# regexp of nested parens in qXX
4450							my $q_paren = qr{(?{local $nest=0}) (?>(?:
4451							[^\x80-\xFF()] \|
4452							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4453							\( (?{$nest++}) \|
4454							\) (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4455							[\x00-\xFF]
4456							}xms;
4457
4458							my $q_brace = qr{(?{local $nest=0}) (?>(?:
4459							[^\x80-\xFF\{\}] \|
4460							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4461							\{ (?{$nest++}) \|
4462							\} (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4463							[\x00-\xFF]
4464							}xms;
4465
4466							my $q_bracket = qr{(?{local $nest=0}) (?>(?:
4467							[^\x80-\xFF\[\]] \|
4468							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4469							\[ (?{$nest++}) \|
4470							\] (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4471							[\x00-\xFF]
4472							}xms;
4473
4474							my $q_angle = qr{(?{local $nest=0}) (?>(?:
4475							[^\x80-\xFF<>] \|
4476							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4477							\< (?{$nest++}) \|
4478							\> (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4479							[\x00-\xFF]
4480							}xms;
4481
4482							my $matched = '';
4483							my $s_matched = '';
4484
4485							my $tr_variable = ''; # variable of tr///
4486							my $sub_variable = ''; # variable of s///
4487							my $bind_operator = ''; # =~ or !~
4488
4489							my @heredoc = (); # here document
4490							my @heredoc_delimiter = ();
4491							my $here_script = ''; # here script
4492
4493							#
4494							# escape UTF-8 script
4495							#
4496							sub UTF2::escape(;$) {
4497	302	50		302	0	817	local($_) = $_[0] if @_;
4498
4499							# P.359 The Study Function
4500							# in Chapter 7: Perl
4501							# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
4502
4503	302					468	study $_; # Yes, I studied study yesterday.
4504
4505							# while all script
4506
4507							# 6.14. Matching from Where the Last Pattern Left Off
4508							# in Chapter 6. Pattern Matching
4509							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
4510							# (and so on)
4511
4512							# one member of Tag-team
4513							#
4514							# P.128 Start of match (or end of previous match): \G
4515							# P.130 Advanced Use of \G with Perl
4516							# in Chapter 3: Overview of Regular Expression Features and Flavors
4517							# P.255 Use leading anchors
4518							# P.256 Expose ^ and \G at the front expressions
4519							# in Chapter 6: Crafting an Efficient Expression
4520							# P.315 "Tag-team" matching with /gc
4521							# in Chapter 7: Perl
4522							# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
4523
4524	302					417	my $e_script = '';
4525	302					1068	while (not /\G \z/oxgc) { # member
4526	131378					137008	$e_script .= UTF2::escape_token();
4527							}
4528
4529	302					3431	return $e_script;
4530							}
4531
4532							#
4533							# escape UTF-8 token of script
4534							#
4535							sub UTF2::escape_token {
4536
4537							# \n output here document
4538
4539	131378			131378	0	93563	my $ignore_modules = join('\|', qw(
4540							utf8
4541							bytes
4542							charnames
4543							I18N::Japanese
4544							I18N::Collate
4545							I18N::JExt
4546							File::DosGlob
4547							Wild
4548							Wildcard
4549							Japanese
4550							));
4551
4552							# another member of Tag-team
4553							#
4554							# P.315 "Tag-team" matching with /gc
4555							# in Chapter 7: Perl
4556							# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
4557
4558	131378	100	100			13268880	if (/\G ( \n ) /oxgc) { # another member (and so on)
		100	100
		100
		100
		100
		100
		100
		100
		100
		100
		50
		100
		50
		100
		100
		50
		50
		50
		50
		50
		50
		100
		100
		50
		50
		50
		50
		100
		100
		50
		100
		50
		100
		100
		100
		100
		50
		100
		100
		100
		50
		100
		100
		100
		50
		50
		50
		100
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		100
		100
		100
		100
		100
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		100
		50
		100
		100
		100
		100
		100
		100
		100
		100
		100
		100
		100
		50
		50
		50
		100
		50
		50
		100
		50
		100
		50
		50
		50
		50
		50
		50
		100
		100
		100
		100
		100
		100
		50
		50
		100
		100
		100
		100
		50
		100
		50
		50
		50
		100
		50
		50
		100
		100
		100
		50
4559	21595					15777	my $heredoc = '';
4560	21595	100				32801	if (scalar(@heredoc_delimiter) >= 1) {
4561	167					179	$slash = 'm//';
4562
4563	167					265	$heredoc = join '', @heredoc;
4564	167					241	@heredoc = ();
4565
4566							# skip here document
4567	167					244	for my $heredoc_delimiter (@heredoc_delimiter) {
4568	175					1033	/\G .*? \n $heredoc_delimiter \n/xmsgc;
4569							}
4570	167					212	@heredoc_delimiter = ();
4571
4572	167					180	$here_script = '';
4573							}
4574	21595					51053	return "\n" . $heredoc;
4575							}
4576
4577							# ignore space, comment
4578	34886					81813	elsif (/\G ((?>\s+)\|\#.*) /oxgc) { return $1; }
4579
4580							# if (, elsif (, unless (, while (, until (, given (, and when (
4581
4582							# given, when
4583
4584							# P.225 The given Statement
4585							# in Chapter 15: Smart Matching and given-when
4586							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
4587
4588							# P.133 The given Statement
4589							# in Chapter 4: Statements and Declarations
4590							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
4591
4592							elsif (/\G ( (?: if \| elsif \| unless \| while \| until \| given \| when ) (?>\s*) \( ) /oxgc) {
4593	2370					2603	$slash = 'm//';
4594	2370					5888	return $1;
4595							}
4596
4597							# scalar variable ($scalar = ...) =~ tr///;
4598							# scalar variable ($scalar = ...) =~ s///;
4599
4600							# state
4601
4602							# P.68 Persistent, Private Variables
4603							# in Chapter 4: Subroutines
4604							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
4605
4606							# P.160 Persistent Lexically Scoped Variables: state
4607							# in Chapter 4: Statements and Declarations
4608							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
4609
4610							# (and so on)
4611
4612							elsif (/\G ( \( (?>\s) (?: local \b \| my \b \| our \b \| state \b )? (?>\s) \$ $qq_scalar ) /oxgc) {
4613	138					242	my $e_string = e_string($1);
4614
4615	138	50				5507	if (/\G ( (?>\s) = $qq_paren \) ) ( (?>\s) (?: =~ \| !~ ) (?>\s*) ) (?= (?: tr \| y ) \b ) /oxgc) {
		50
4616	0					0	$tr_variable = $e_string . e_string($1);
4617	0					0	$bind_operator = $2;
4618	0					0	$slash = 'm//';
4619	0					0	return '';
4620							}
4621							elsif (/\G ( (?>\s) = $qq_paren \) ) ( (?>\s) (?: =~ \| !~ ) (?>\s*) ) (?= s \b ) /oxgc) {
4622	0					0	$sub_variable = $e_string . e_string($1);
4623	0					0	$bind_operator = $2;
4624	0					0	$slash = 'm//';
4625	0					0	return '';
4626							}
4627							else {
4628	138					156	$slash = 'div';
4629	138					575	return $e_string;
4630							}
4631							}
4632
4633							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
4634							elsif (/\G ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \b \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) /oxmsgc) {
4635	4					8	$slash = 'div';
4636	4					9	return q{Eutf2::PREMATCH()};
4637							}
4638
4639							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
4640							elsif (/\G ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \b \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) /oxmsgc) {
4641	28					43	$slash = 'div';
4642	28					70	return q{Eutf2::MATCH()};
4643							}
4644
4645							# $', ${'} --> $', ${'}
4646							elsif (/\G ( \$' \| \$\{'\} ) /oxmsgc) {
4647	1					1	$slash = 'div';
4648	1					4	return $1;
4649							}
4650
4651							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
4652							elsif (/\G ( \$ (?>\s) POSTMATCH \b \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) /oxmsgc) {
4653	3					5	$slash = 'div';
4654	3					9	return q{Eutf2::POSTMATCH()};
4655							}
4656
4657							# scalar variable $scalar =~ tr///;
4658							# scalar variable $scalar =~ s///;
4659							# substr() =~ tr///;
4660							# substr() =~ s///;
4661							elsif (/\G ( \$ $qq_scalar \| $qq_substr ) /oxgc) {
4662	2251					3623	my $scalar = e_string($1);
4663
4664	2251	100				7005	if (/\G ( (?>\s) (?: =~ \| !~ ) (?>\s) ) (?= (?: tr \| y ) \b ) /oxgc) {
		100
4665	9					10	$tr_variable = $scalar;
4666	9					12	$bind_operator = $1;
4667	9					10	$slash = 'm//';
4668	9					20	return '';
4669							}
4670							elsif (/\G ( (?>\s) (?: =~ \| !~ ) (?>\s) ) (?= s \b ) /oxgc) {
4671	95					146	$sub_variable = $scalar;
4672	95					146	$bind_operator = $1;
4673	95					112	$slash = 'm//';
4674	95					255	return '';
4675							}
4676							else {
4677	2147					1911	$slash = 'div';
4678	2147					4809	return $scalar;
4679							}
4680							}
4681
4682							# end of statement
4683							elsif (/\G ( [,;] ) /oxgc) {
4684	8694					8234	$slash = 'm//';
4685
4686							# clear tr/// variable
4687	8694					7044	$tr_variable = '';
4688
4689							# clear s/// variable
4690	8694					6310	$sub_variable = '';
4691
4692	8694					5931	$bind_operator = '';
4693
4694	8694					24570	return $1;
4695							}
4696
4697							# bareword
4698							elsif (/\G ( \{ (?>\s) (?: tr \| index \| rindex \| reverse ) (?>\s) \} ) /oxmsgc) {
4699	0					0	return $1;
4700							}
4701
4702							# $0 --> $0
4703							elsif (/\G ( \$ 0 ) /oxmsgc) {
4704	2					6	$slash = 'div';
4705	2					7	return $1;
4706							}
4707							elsif (/\G ( \$ \{ (?>\s) 0 (?>\s) \} ) /oxmsgc) {
4708	0					0	$slash = 'div';
4709	0					0	return $1;
4710							}
4711
4712							# $$ --> $$
4713							elsif (/\G ( \$ \$ ) (?![\w\{]) /oxmsgc) {
4714	1					2	$slash = 'div';
4715	1					3	return $1;
4716							}
4717
4718							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
4719							# $1, $2, $3 --> $1, $2, $3 otherwise
4720							elsif (/\G \$ ((?>[1-9][0-9]*)) /oxmsgc) {
4721	57					74	$slash = 'div';
4722	57					111	return e_capture($1);
4723							}
4724							elsif (/\G \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} /oxmsgc) {
4725	0					0	$slash = 'div';
4726	0					0	return e_capture($1);
4727							}
4728
4729							# $$foo[ ... ] --> $ $foo->[ ... ]
4730							elsif (/\G \$ ( \$ (?> [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ .+? \] ) /oxmsgc) {
4731	0					0	$slash = 'div';
4732	0					0	return e_capture($1.'->'.$2);
4733							}
4734
4735							# $$foo{ ... } --> $ $foo->{ ... }
4736							elsif (/\G \$ ( \$ (?> [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ .+? \} ) /oxmsgc) {
4737	0					0	$slash = 'div';
4738	0					0	return e_capture($1.'->'.$2);
4739							}
4740
4741							# $$foo
4742							elsif (/\G \$ ( \$ (?> [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) /oxmsgc) {
4743	0					0	$slash = 'div';
4744	0					0	return e_capture($1);
4745							}
4746
4747							# ${ foo }
4748							elsif (/\G \$ (?>\s) \{ ( (?>\s) (?> [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* ) (?>\s*) ) \} /oxmsgc) {
4749	0					0	$slash = 'div';
4750	0					0	return '${' . $1 . '}';
4751							}
4752
4753							# ${ ... }
4754							elsif (/\G \$ (?>\s) \{ (?>\s) ( $qq_brace ) (?>\s*) \} /oxmsgc) {
4755	0					0	$slash = 'div';
4756	0					0	return e_capture($1);
4757							}
4758
4759							# variable or function
4760							# $ @ % & * $ #
4761							elsif (/\G ( (?: [\$\@\%\&\] \| \$\# \| -> \| \b sub \b) (?>\s) (?: split \| chop \| index \| rindex \| lc \| uc \| fc \| chr \| ord \| reverse \| getc \| tr \| y \| q \| qq \| qx \| qw \| m \| s \| qr \| glob \| lstat \| opendir \| stat \| unlink \| chdir ) ) \b /oxmsgc) {
4762	27					34	$slash = 'div';
4763	27					69	return $1;
4764							}
4765							# $ $ $ $ $ $ $ $ $ $ $ $ $ $
4766							# $ @ # \ ' " / ? ( ) [ ] < >
4767							elsif (/\G ( \$[\$\@\#\\\'\"\/\?\[\]\<\>] ) /oxmsgc) {
4768	88					116	$slash = 'div';
4769	88					254	return $1;
4770							}
4771
4772							# while ()
4773							elsif (/\G \b (while (?>\s) $ (?>\s) <[\$]?[A-Za-z_][A-Za-z_0-9]> (?>\s) $) \b /oxgc) {
4774	0					0	return $1;
4775							}
4776
4777							# while () --- glob
4778
4779							# avoid "Error: Runtime exception" of perl version 5.005_03
4780
4781							elsif (/\G \b while (?>\s) $ (?>\s) < ((?:[^\x80-\xFF>\0\a\e\f\n\r\t]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])+?) > (?>\s*) $ \b /oxgc) {
4782	0					0	return 'while ($_ = Eutf2::glob("' . $1 . '"))';
4783							}
4784
4785							# while (glob)
4786							elsif (/\G \b while (?>\s) $ (?>\s) glob (?>\s*) $ /oxgc) {
4787	0					0	return 'while ($_ = Eutf2::glob_)';
4788							}
4789
4790							# while (glob(WILDCARD))
4791							elsif (/\G \b while (?>\s) \( (?>\s) glob \b /oxgc) {
4792	0					0	return 'while ($_ = Eutf2::glob';
4793							}
4794
4795							# doit if, doit unless, doit while, doit until, doit for, doit when
4796	394					612	elsif (/\G \b ( if \| unless \| while \| until \| for \| when ) \b /oxgc) { $slash = 'm//'; return $1; }
	394					1237
4797
4798							# subroutines of package Eutf2
4799	19					24	elsif (/\G \b (CORE:: \| ->(>?\s*) (?: atan2 \| [a-z]{2,})) \b /oxgc) { $slash = 'm//'; return $1; }
	19					53
4800	0					0	elsif (/\G \b Char::eval (?= (?>\s*) \{ ) /oxgc) { $slash = 'm//'; return 'eval'; }
	0					0
4801	13					10	elsif (/\G \b UTF2::eval (?= (?>\s*) \{ ) /oxgc) { $slash = 'm//'; return 'eval'; }
	13					27
4802	0					0	elsif (/\G \b Char::eval \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'eval Char::escape'; }
	0					0
4803	114					121	elsif (/\G \b UTF2::eval \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'eval UTF2::escape'; }
	114					263
4804	2					5	elsif (/\G \b bytes::substr \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'substr'; }
	2					9
4805	2					2	elsif (/\G \b chop \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::chop'; }
	2					6
4806	2					4	elsif (/\G \b bytes::index \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'index'; }
	2					7
4807	0					0	elsif (/\G \b Char::index \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Char::index'; }
	0					0
4808	2					3	elsif (/\G \b UTF2::index \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'UTF2::index'; }
	2					4
4809	2					3	elsif (/\G \b index \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::index'; }
	2					5
4810	2					3	elsif (/\G \b bytes::rindex \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'rindex'; }
	2					5
4811	0					0	elsif (/\G \b Char::rindex \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Char::rindex'; }
	0					0
4812	2					3	elsif (/\G \b UTF2::rindex \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'UTF2::rindex'; }
	2					5
4813	2					3	elsif (/\G \b rindex \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::rindex'; }
	2					4
4814	1					1	elsif (/\G \b lc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::lc'; }
	1					3
4815	0					0	elsif (/\G \b lcfirst (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::lcfirst'; }
	0					0
4816	0					0	elsif (/\G \b uc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::uc'; }
	0					0
4817	0					0	elsif (/\G \b ucfirst (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::ucfirst'; }
	0					0
4818	7					8	elsif (/\G \b fc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::fc'; }
	7					22
4819
4820							# "-s '' ..." means file test "-s 'filename' ..." (not means "- s/// ...")
4821	0					0	elsif (/\G -s (?>\s*) (\") ((?:$qq_char)+?) (\") /oxgc) { $slash = 'm//'; return '-s ' . e_qq('', $1,$3,$2); }
	0					0
4822	0					0	elsif (/\G -s (?>\s+) qq (?>\s*) (\#) ((?:$qq_char)+?) (\#) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0					0
4823	0					0	elsif (/\G -s (?>\s+) qq (?>\s*) ($) ((?:$qq_paren)+?) ($) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0					0
4824	0					0	elsif (/\G -s (?>\s+) qq (?>\s*) (\{) ((?:$qq_brace)+?) (\}) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0					0
4825	0					0	elsif (/\G -s (?>\s+) qq (?>\s*) (\[) ((?:$qq_bracket)+?) (\]) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0					0
4826	0					0	elsif (/\G -s (?>\s+) qq (?>\s*) (\<) ((?:$qq_angle)+?) (\>) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0					0
4827	0					0	elsif (/\G -s (?>\s+) qq (?>\s*) (\S) ((?:$qq_char)+?) (\1) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0					0
4828
4829	0					0	elsif (/\G -s (?>\s*) (\') ((?:\\\'\|\\\\\|$q_char)+?) (\') /oxgc) { $slash = 'm//'; return '-s ' . e_q ('', $1,$3,$2); }
	0					0
4830	0					0	elsif (/\G -s (?>\s+) q (?>\s*) (\#) ((?:\\\#\|\\\\\|$q_char)+?) (\#) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0					0
4831	0					0	elsif (/\G -s (?>\s+) q (?>\s*) ($) ((?:\\$\|\\\\\|$q_paren)+?) (\)) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0					0
4832	0					0	elsif (/\G -s (?>\s+) q (?>\s*) (\{) ((?:\\\}\|\\\\\|$q_brace)+?) (\}) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0					0
4833	0					0	elsif (/\G -s (?>\s+) q (?>\s*) (\[) ((?:\\\]\|\\\\\|$q_bracket)+?) (\]) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0					0
4834	0					0	elsif (/\G -s (?>\s+) q (?>\s*) (\<) ((?:\\\>\|\\\\\|$q_angle)+?) (\>) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0					0
4835	0					0	elsif (/\G -s (?>\s+) q (?>\s*) (\S) ((?:\\\1\|\\\\\|$q_char)+?) (\1) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0					0
4836
4837							elsif (/\G -s (?>\s) (\$ (?> \w+ (?: ::\w+) ) (?: (?: ->)? (?: [\$\@\%\&\]\ \| \$\#\* \| $ (?:$qq_paren)? $ \| [\@\%\]? \{ (?:$qq_brace)+? \} \| [\@\%]? \[ (?:$qq_bracket)+? \] ) )*) /oxgc)
4838	0					0	{ $slash = 'm//'; return "-s $1"; }
	0					0
4839	0					0	elsif (/\G -s (?>\s) $ ((?:$qq_paren)?) $ /oxgc) { $slash = 'm//'; return "-s ($1)"; }
	0					0
4840	0					0	elsif (/\G -s (?= (?>\s+) [a-z]+) /oxgc) { $slash = 'm//'; return '-s'; }
	0					0
4841	0					0	elsif (/\G -s (?>\s+) ((?>\w+)) /oxgc) { $slash = 'm//'; return "-s $1"; }
	0					0
4842
4843	2					4	elsif (/\G \b bytes::length (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'length'; }
	2					8
4844	2					3	elsif (/\G \b bytes::chr (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'chr'; }
	2					6
4845	36					38	elsif (/\G \b chr (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::chr'; }
	36					90
4846	2					2	elsif (/\G \b bytes::ord (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'div'; return 'ord'; }
	2					8
4847	2					3	elsif (/\G \b ord (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'div'; return $function_ord; }
	2					6
4848	0					0	elsif (/\G \b glob (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::glob'; }
	0					0
4849	0					0	elsif (/\G \b lc \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::lc_'; }
	0					0
4850	0					0	elsif (/\G \b lcfirst \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::lcfirst_'; }
	0					0
4851	0					0	elsif (/\G \b uc \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::uc_'; }
	0					0
4852	0					0	elsif (/\G \b ucfirst \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::ucfirst_'; }
	0					0
4853	0					0	elsif (/\G \b fc \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::fc_'; }
	0					0
4854	0					0	elsif (/\G -s \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return '-s '; }
	0					0
4855
4856	0					0	elsif (/\G \b bytes::length \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'length'; }
	0					0
4857	0					0	elsif (/\G \b bytes::chr \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'chr'; }
	0					0
4858	0					0	elsif (/\G \b chr \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::chr_'; }
	0					0
4859	0					0	elsif (/\G \b bytes::ord \b (?! (?>\s*) => ) /oxgc) { $slash = 'div'; return 'ord'; }
	0					0
4860	2					4	elsif (/\G \b ord \b (?! (?>\s*) => ) /oxgc) { $slash = 'div'; return $function_ord_; }
	2					6
4861	0					0	elsif (/\G \b glob \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::glob_'; }
	0					0
4862	4					5	elsif (/\G \b reverse \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return $function_reverse; }
	4					12
4863	8					12	elsif (/\G \b getc \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return $function_getc; }
	8					23
4864							# split
4865							elsif (/\G \b (split) \b (?! (?>\s*) => ) /oxgc) {
4866	120					188	$slash = 'm//';
4867
4868	120					141	my $e = '';
4869	120					401	while (/\G ( (?>\s+) \| \( \| \#.* ) /oxgc) {
4870	117					391	$e .= $1;
4871							}
4872
4873							# end of split
4874	120	100				13242	if (/\G (?= [,;\)\}\]] ) /oxgc) { return 'Eutf2::split' . $e; }
	3	100				16
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		100
		100
		50
		100
		50
		100
		50
		50
		50
4875
4876							# split scalar value
4877	1					3	elsif (/\G ( [\$\@\&\*] $qq_scalar ) /oxgc) { return 'Eutf2::split' . $e . e_string($1); }
4878
4879							# split literal space
4880	0					0	elsif (/\G \b qq (\#) [ ] (\#) /oxgc) { return 'Eutf2::split' . $e . qq {qq$1 $2}; }
4881	0					0	elsif (/\G \b qq ((?>\s*)) ($) [ ] ($) /oxgc) { return 'Eutf2::split' . $e . qq{$1qq$2 $3}; }
4882	0					0	elsif (/\G \b qq ((?>\s*)) (\{) [ ] (\}) /oxgc) { return 'Eutf2::split' . $e . qq{$1qq$2 $3}; }
4883	0					0	elsif (/\G \b qq ((?>\s*)) (\[) [ ] (\]) /oxgc) { return 'Eutf2::split' . $e . qq{$1qq$2 $3}; }
4884	0					0	elsif (/\G \b qq ((?>\s*)) (\<) [ ] (\>) /oxgc) { return 'Eutf2::split' . $e . qq{$1qq$2 $3}; }
4885	0					0	elsif (/\G \b qq ((?>\s*)) (\S) [ ] (\2) /oxgc) { return 'Eutf2::split' . $e . qq{$1qq$2 $3}; }
4886	0					0	elsif (/\G \b q (\#) [ ] (\#) /oxgc) { return 'Eutf2::split' . $e . qq {q$1 $2}; }
4887	0					0	elsif (/\G \b q ((?>\s*)) ($) [ ] ($) /oxgc) { return 'Eutf2::split' . $e . qq {$1q$2 $3}; }
4888	0					0	elsif (/\G \b q ((?>\s*)) (\{) [ ] (\}) /oxgc) { return 'Eutf2::split' . $e . qq {$1q$2 $3}; }
4889	0					0	elsif (/\G \b q ((?>\s*)) (\[) [ ] (\]) /oxgc) { return 'Eutf2::split' . $e . qq {$1q$2 $3}; }
4890	0					0	elsif (/\G \b q ((?>\s*)) (\<) [ ] (\>) /oxgc) { return 'Eutf2::split' . $e . qq {$1q$2 $3}; }
4891	0					0	elsif (/\G \b q ((?>\s*)) (\S) [ ] (\2) /oxgc) { return 'Eutf2::split' . $e . qq {$1q$2 $3}; }
4892	13					53	elsif (/\G ' [ ] ' /oxgc) { return 'Eutf2::split' . $e . qq {' '}; }
4893	2					12	elsif (/\G " [ ] " /oxgc) { return 'Eutf2::split' . $e . qq {" "}; }
4894
4895							# split qq//
4896							elsif (/\G \b (qq) \b /oxgc) {
4897	0	0				0	if (/\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq# # --> qr # #
	0					0
4898							else {
4899	0					0	while (not /\G \z/oxgc) {
4900	0	0				0	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
		0
4901	0					0	elsif (/\G ($) ((?:$qq_paren)*?) ($) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq ( ) --> qr ( )
4902	0					0	elsif (/\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq { } --> qr { }
4903	0					0	elsif (/\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq [ ] --> qr [ ]
4904	0					0	elsif (/\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq < > --> qr < >
4905	0					0	elsif (/\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) /oxgc) { return e_split($e.'qr','{','}',$2,''); } # qq \| \| --> qr { }
4906	0					0	elsif (/\G (\S) ((?:$qq_char)?) (\1) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq * --> qr * *
4907							}
4908	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4909							}
4910							}
4911
4912							# split qr//
4913							elsif (/\G \b (qr) \b /oxgc) {
4914	12	50				725	if (/\G (\#) ((?:$qq_char)?) (\#) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1,$3,$2,$4); } # qr# #
	0					0
4915							else {
4916	12					53	while (not /\G \z/oxgc) {
4917	12	50				5575	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	50				0
		50
		50
		50
		50
		50
		50
4918	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # qr ( )
4919	0					0	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # qr { }
4920	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # qr [ ]
4921	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # qr < >
4922	0					0	elsif (/\G (\') ((?:$qq_char)?) (\') ([imosxpadlunbB]) /oxgc) { return e_split_q($e.'qr',$1, $3, $2,$4); } # qr ' '
4923	0					0	elsif (/\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ([imosxpadlunbB]*) /oxgc) { return e_split ($e.'qr','{','}',$2,$4); } # qr \| \| --> qr { }
4924	12					55	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # qr * *
4925							}
4926	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4927							}
4928							}
4929
4930							# split q//
4931							elsif (/\G \b (q) \b /oxgc) {
4932	0	0				0	if (/\G (\#) ((?:\\\#\|\\\\\|$q_char)*?) (\#) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q# # --> qr # #
	0					0
4933							else {
4934	0					0	while (not /\G \z/oxgc) {
4935	0	0				0	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
		0
4936	0					0	elsif (/\G ($) ((?:\\\\\|\\$\|\\$\|$q_paren)*?) ($) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q ( ) --> qr ( )
4937	0					0	elsif (/\G (\{) ((?:\\\\\|\\\}\|\\\{\|$q_brace)*?) (\}) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q { } --> qr { }
4938	0					0	elsif (/\G (\[) ((?:\\\\\|\\\]\|\\\[\|$q_bracket)*?) (\]) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q [ ] --> qr [ ]
4939	0					0	elsif (/\G (\<) ((?:\\\\\|\\\>\|\\\<\|$q_angle)*?) (\>) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q < > --> qr < >
4940	0					0	elsif (/\G ([\-:?\\^\|]) ((?:$q_char)?) (\1) /oxgc) { return e_split_q($e.'qr','{','}',$2,''); } # q \| \| --> qr { }
4941	0					0	elsif (/\G (\S) ((?:\\\\\|\\\1\| $q_char)?) (\1) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q * --> qr * *
4942							}
4943	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4944							}
4945							}
4946
4947							# split m//
4948							elsif (/\G \b (m) \b /oxgc) {
4949	24	50				861	if (/\G (\#) ((?:$qq_char)?) (\#) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1,$3,$2,$4); } # m# # --> qr # #
	0					0
4950							else {
4951	24					87	while (not /\G \z/oxgc) {
4952	24	50				6774	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	50				0
		50
		50
		50
		50
		50
		50
4953	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # m ( ) --> qr ( )
4954	0					0	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # m { } --> qr { }
4955	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # m [ ] --> qr [ ]
4956	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # m < > --> qr < >
4957	0					0	elsif (/\G (\') ((?:$qq_char)?) (\') ([cgimosxpadlunbB]) /oxgc) { return e_split_q($e.'qr',$1, $3, $2,$4); } # m ' ' --> qr ' '
4958	0					0	elsif (/\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]*) /oxgc) { return e_split ($e.'qr','{','}',$2,$4); } # m \| \| --> qr { }
4959	24					145	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # m * * --> qr * *
4960							}
4961	0					0	die __FILE__, ": Search pattern not terminated\n";
4962							}
4963							}
4964
4965							# split ''
4966							elsif (/\G (\') /oxgc) {
4967	0					0	my $q_string = '';
4968	0					0	while (not /\G \z/oxgc) {
4969	0	0				0	if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0				0
		0
		0
4970	0					0	elsif (/\G (\\\') /oxgc) { $q_string .= $1; } # splitqr'' --> split qr''
4971	0					0	elsif (/\G \' /oxgc) { return e_split_q($e.q{ qr},"'","'",$q_string,''); } # ' ' --> qr ' '
4972	0					0	elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
4973							}
4974	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4975							}
4976
4977							# split ""
4978							elsif (/\G (\") /oxgc) {
4979	0					0	my $qq_string = '';
4980	0					0	while (not /\G \z/oxgc) {
4981	0	0				0	if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0				0
		0
		0
4982	0					0	elsif (/\G (\\\") /oxgc) { $qq_string .= $1; } # splitqr"" --> split qr""
4983	0					0	elsif (/\G \" /oxgc) { return e_split($e.q{ qr},'"','"',$qq_string,''); } # " " --> qr " "
4984	0					0	elsif (/\G ($q_char) /oxgc) { $qq_string .= $1; }
4985							}
4986	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4987							}
4988
4989							# split //
4990							elsif (/\G (\/) /oxgc) {
4991	65					98	my $regexp = '';
4992	65					186	while (not /\G \z/oxgc) {
4993	434	50				2562	if (/\G (\\\\) /oxgc) { $regexp .= $1; }
	0	50				0
		100
		50
4994	0					0	elsif (/\G (\\\/) /oxgc) { $regexp .= $1; } # splitqr// --> split qr//
4995	65					231	elsif (/\G \/ ([cgimosxpadlunbB]*) /oxgc) { return e_split($e.q{ qr}, '/','/',$regexp,$1); } # / / --> qr / /
4996	369					690	elsif (/\G ($q_char) /oxgc) { $regexp .= $1; }
4997							}
4998	0					0	die __FILE__, ": Search pattern not terminated\n";
4999							}
5000							}
5001
5002							# tr/// or y///
5003
5004							# about [cdsrbB]* (/B modifier)
5005							#
5006							# P.559 appendix C
5007							# of ISBN 4-89052-384-7 Programming perl
5008							# (Japanese title is: Perl puroguramingu)
5009
5010							elsif (/\G \b ( tr \| y ) \b /oxgc) {
5011	11					15	my $ope = $1;
5012
5013							# $1 $2 $3 $4 $5 $6
5014	11	50				254	if (/\G (\#) ((?:$qq_char)?) (\#) ((?:$qq_char)?) (\#) ([cdsrbB]*) /oxgc) { # tr# # #
5015	0					0	my @tr = ($tr_variable,$2);
5016	0					0	return e_tr(@tr,'',$4,$6);
5017							}
5018							else {
5019	11					11	my $e = '';
5020	11					25	while (not /\G \z/oxgc) {
5021	11	50				1266	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	50				0
		50
		50
		50
		50
5022							elsif (/\G ($) ((?:$qq_paren)*?) ($) /oxgc) {
5023	0					0	my @tr = ($tr_variable,$2);
5024	0					0	while (not /\G \z/oxgc) {
5025	0	0				0	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
5026	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr ( ) ( )
5027	0					0	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr ( ) { }
5028	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr ( ) [ ]
5029	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr ( ) < >
5030	0					0	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr ( ) * *
5031							}
5032	0					0	die __FILE__, ": Transliteration replacement not terminated\n";
5033							}
5034							elsif (/\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) {
5035	0					0	my @tr = ($tr_variable,$2);
5036	0					0	while (not /\G \z/oxgc) {
5037	0	0				0	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
5038	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr { } ( )
5039	0					0	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr { } { }
5040	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr { } [ ]
5041	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr { } < >
5042	0					0	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr { } * *
5043							}
5044	0					0	die __FILE__, ": Transliteration replacement not terminated\n";
5045							}
5046							elsif (/\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) {
5047	0					0	my @tr = ($tr_variable,$2);
5048	0					0	while (not /\G \z/oxgc) {
5049	0	0				0	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
5050	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr [ ] ( )
5051	0					0	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr [ ] { }
5052	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr [ ] [ ]
5053	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr [ ] < >
5054	0					0	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr [ ] * *
5055							}
5056	0					0	die __FILE__, ": Transliteration replacement not terminated\n";
5057							}
5058							elsif (/\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) {
5059	0					0	my @tr = ($tr_variable,$2);
5060	0					0	while (not /\G \z/oxgc) {
5061	0	0				0	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
5062	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr < > ( )
5063	0					0	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr < > { }
5064	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr < > [ ]
5065	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr < > < >
5066	0					0	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr < > * *
5067							}
5068	0					0	die __FILE__, ": Transliteration replacement not terminated\n";
5069							}
5070							# $1 $2 $3 $4 $5 $6
5071							elsif (/\G (\S) ((?:$qq_char)?) (\1) ((?:$qq_char)?) (\1) ([cdsrbB]) /oxgc) { # tr * *
5072	11					30	my @tr = ($tr_variable,$2);
5073	11					28	return e_tr(@tr,'',$4,$6);
5074							}
5075							}
5076	0					0	die __FILE__, ": Transliteration pattern not terminated\n";
5077							}
5078							}
5079
5080							# qq//
5081							elsif (/\G \b (qq) \b /oxgc) {
5082	3772					5686	my $ope = $1;
5083
5084							# if (/\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { return e_qq($ope,$1,$3,$2); } # qq# #
5085	3772	100				5334	if (/\G (\#) /oxgc) { # qq# #
5086	40					38	my $qq_string = '';
5087	40					71	while (not /\G \z/oxgc) {
5088	1948	100				5230	if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	80	50				184
		100
		50
5089	0					0	elsif (/\G (\\\#) /oxgc) { $qq_string .= $1; }
5090	40					63	elsif (/\G (\#) /oxgc) { return e_qq($ope,'#','#',$qq_string); }
5091	1828					2671	elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
5092							}
5093	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5094							}
5095
5096							else {
5097	3732					3498	my $e = '';
5098	3732					7204	while (not /\G \z/oxgc) {
5099	3732	50				12223	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	50				0
		100
		50
		100
		50
5100
5101							# elsif (/\G ($) ((?:$qq_paren)*?) ($) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qq ( )
5102							elsif (/\G (\() /oxgc) { # qq ( )
5103	0					0	my $qq_string = '';
5104	0					0	local $nest = 1;
5105	0					0	while (not /\G \z/oxgc) {
5106	0	0				0	if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0				0
		0
		0
		0
5107	0					0	elsif (/\G (\\\)) /oxgc) { $qq_string .= $1; }
5108	0					0	elsif (/\G (\() /oxgc) { $qq_string .= $1; $nest++; }
	0					0
5109							elsif (/\G (\)) /oxgc) {
5110	0	0				0	if (--$nest == 0) { return $e . e_qq($ope,'(',')',$qq_string); }
	0					0
5111	0					0	else { $qq_string .= $1; }
5112							}
5113	0					0	elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
5114							}
5115	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5116							}
5117
5118							# elsif (/\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qq { }
5119							elsif (/\G (\{) /oxgc) { # qq { }
5120	3674					3086	my $qq_string = '';
5121	3674					4026	local $nest = 1;
5122	3674					6520	while (not /\G \z/oxgc) {
5123	154373	100				484026	if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	792	50				1302
		100
		100
		50
5124	0					0	elsif (/\G (\\\}) /oxgc) { $qq_string .= $1; }
5125	1334					1391	elsif (/\G (\{) /oxgc) { $qq_string .= $1; $nest++; }
	1334					1957
5126							elsif (/\G (\}) /oxgc) {
5127	5008	100				5900	if (--$nest == 0) { return $e . e_qq($ope,'{','}',$qq_string); }
	3674					6410
5128	1334					2303	else { $qq_string .= $1; }
5129							}
5130	147239					229113	elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
5131							}
5132	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5133							}
5134
5135							# elsif (/\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qq [ ]
5136							elsif (/\G (\[) /oxgc) { # qq [ ]
5137	0					0	my $qq_string = '';
5138	0					0	local $nest = 1;
5139	0					0	while (not /\G \z/oxgc) {
5140	0	0				0	if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0				0
		0
		0
		0
5141	0					0	elsif (/\G (\\\]) /oxgc) { $qq_string .= $1; }
5142	0					0	elsif (/\G (\[) /oxgc) { $qq_string .= $1; $nest++; }
	0					0
5143							elsif (/\G (\]) /oxgc) {
5144	0	0				0	if (--$nest == 0) { return $e . e_qq($ope,'[',']',$qq_string); }
	0					0
5145	0					0	else { $qq_string .= $1; }
5146							}
5147	0					0	elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
5148							}
5149	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5150							}
5151
5152							# elsif (/\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qq < >
5153							elsif (/\G (\<) /oxgc) { # qq < >
5154	38					41	my $qq_string = '';
5155	38					55	local $nest = 1;
5156	38					111	while (not /\G \z/oxgc) {
5157	1418	100				5475	if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	22	50				47
		50
		100
		50
5158	0					0	elsif (/\G (\\\>) /oxgc) { $qq_string .= $1; }
5159	0					0	elsif (/\G (\<) /oxgc) { $qq_string .= $1; $nest++; }
	0					0
5160							elsif (/\G (\>) /oxgc) {
5161	38	50				86	if (--$nest == 0) { return $e . e_qq($ope,'<','>',$qq_string); }
	38					85
5162	0					0	else { $qq_string .= $1; }
5163							}
5164	1358					2202	elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
5165							}
5166	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5167							}
5168
5169							# elsif (/\G (\S) ((?:$qq_char)?) (\1) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qq *
5170							elsif (/\G (\S) /oxgc) { # qq * *
5171	20					20	my $delimiter = $1;
5172	20					20	my $qq_string = '';
5173	20					30	while (not /\G \z/oxgc) {
5174	840	50				2706	if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	50				0
		100
		50
5175	0					0	elsif (/\G (\\\Q$delimiter\E) /oxgc) { $qq_string .= $1; }
5176	20					32	elsif (/\G (\Q$delimiter\E) /oxgc) { return $e . e_qq($ope,$delimiter,$delimiter,$qq_string); }
5177	820					1297	elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
5178							}
5179	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5180							}
5181							}
5182	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5183							}
5184							}
5185
5186							# qr//
5187							elsif (/\G \b (qr) \b /oxgc) {
5188	36					55	my $ope = $1;
5189	36	50				492	if (/\G (\#) ((?:$qq_char)?) (\#) ([imosxpadlunbB]) /oxgc) { # qr# # #
5190	0					0	return e_qr($ope,$1,$3,$2,$4);
5191							}
5192							else {
5193	36					50	my $e = '';
5194	36					75	while (not /\G \z/oxgc) {
5195	36	50				3400	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	50				0
		100
		50
		50
		100
		50
		50
5196	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([imosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # qr ( )
5197	1					8	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([imosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # qr { }
5198	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([imosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # qr [ ]
5199	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([imosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # qr < >
5200	2					9	elsif (/\G (\') ((?:$qq_char)?) (\') ([imosxpadlunbB]) /oxgc) { return $e . e_qr_q($ope,$1, $3, $2,$4); } # qr ' '
5201	0					0	elsif (/\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ([imosxpadlunbB]*) /oxgc) { return $e . e_qr ($ope,'{','}',$2,$4); } # qr \| \| --> qr { }
5202	33					69	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([imosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # qr * *
5203							}
5204	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5205							}
5206							}
5207
5208							# qw//
5209							elsif (/\G \b (qw) \b /oxgc) {
5210	34					67	my $ope = $1;
5211	34	50				118	if (/\G (\#) (.*?) (\#) /oxmsgc) { # qw# #
5212	0					0	return e_qw($ope,$1,$3,$2);
5213							}
5214							else {
5215	34					45	my $e = '';
5216	34					95	while (not /\G \z/oxgc) {
5217	34	50				180	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	50				0
		0
		0
		0
		0
		0
		0
		0
		0
		0
5218
5219	34					92	elsif (/\G ($) ([^(]*?) ($) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw ( )
5220	0					0	elsif (/\G ($) ((?:$q_paren)*?) ($) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw ( )
5221
5222	0					0	elsif (/\G (\{) ([^{]*?) (\}) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw { }
5223	0					0	elsif (/\G (\{) ((?:$q_brace)*?) (\}) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw { }
5224
5225	0					0	elsif (/\G (\[) ([^[]*?) (\]) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw [ ]
5226	0					0	elsif (/\G (\[) ((?:$q_bracket)*?) (\]) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw [ ]
5227
5228	0					0	elsif (/\G (\<) ([^<]*?) (\>) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw < >
5229	0					0	elsif (/\G (\<) ((?:$q_angle)*?) (\>) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw < >
5230
5231	0					0	elsif (/\G ([\x21-\x3F]) (.?) (\1) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw *
5232	0					0	elsif (/\G (\S) ((?:$q_char)?) (\1) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw *
5233							}
5234	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5235							}
5236							}
5237
5238							# qx//
5239							elsif (/\G \b (qx) \b /oxgc) {
5240	2					4	my $ope = $1;
5241	2	50				61	if (/\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { # qx# #
5242	0					0	return e_qq($ope,$1,$3,$2);
5243							}
5244							else {
5245	2					4	my $e = '';
5246	2					6	while (not /\G \z/oxgc) {
5247	2	50				201	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	50				0
		50
		0
		0
		0
		0
5248	0					0	elsif (/\G ($) ((?:$qq_paren)*?) ($) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qx ( )
5249	2					5	elsif (/\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qx { }
5250	0					0	elsif (/\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qx [ ]
5251	0					0	elsif (/\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qx < >
5252	0					0	elsif (/\G (\') ((?:$qq_char)*?) (\') /oxgc) { return $e . e_q ($ope,$1,$3,$2); } # qx ' '
5253	0					0	elsif (/\G (\S) ((?:$qq_char)?) (\1) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qx *
5254							}
5255	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5256							}
5257							}
5258
5259							# q//
5260							elsif (/\G \b (q) \b /oxgc) {
5261	362					799	my $ope = $1;
5262
5263							# if (/\G (\#) ((?:\\\#\|\\\\\|$q_char)*?) (\#) /oxgc) { return e_q($ope,$1,$3,$2); } # q# #
5264
5265							# avoid "Error: Runtime exception" of perl version 5.005_03
5266							# (and so on)
5267
5268	362	50				967	if (/\G (\#) /oxgc) { # q# #
5269	0					0	my $q_string = '';
5270	0					0	while (not /\G \z/oxgc) {
5271	0	0				0	if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0				0
		0
		0
5272	0					0	elsif (/\G (\\\#) /oxgc) { $q_string .= $1; }
5273	0					0	elsif (/\G (\#) /oxgc) { return e_q($ope,'#','#',$q_string); }
5274	0					0	elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5275							}
5276	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5277							}
5278
5279							else {
5280	362					567	my $e = '';
5281	362					1157	while (not /\G \z/oxgc) {
5282	362	50				2037	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	50				0
		100
		50
		100
		50
5283
5284							# elsif (/\G ($) ((?:\\$\|\\\\\|$q_paren)*?) (\)) /oxgc) { return $e . e_q($ope,$1,$3,$2); } # q ( )
5285							elsif (/\G (\() /oxgc) { # q ( )
5286	0					0	my $q_string = '';
5287	0					0	local $nest = 1;
5288	0					0	while (not /\G \z/oxgc) {
5289	0	0				0	if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0				0
		0
		0
		0
		0
5290	0					0	elsif (/\G (\\\)) /oxgc) { $q_string .= $1; }
5291	0					0	elsif (/\G (\\\() /oxgc) { $q_string .= $1; }
5292	0					0	elsif (/\G (\() /oxgc) { $q_string .= $1; $nest++; }
	0					0
5293							elsif (/\G (\)) /oxgc) {
5294	0	0				0	if (--$nest == 0) { return $e . e_q($ope,'(',')',$q_string); }
	0					0
5295	0					0	else { $q_string .= $1; }
5296							}
5297	0					0	elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5298							}
5299	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5300							}
5301
5302							# elsif (/\G (\{) ((?:\\\}\|\\\\\|$q_brace)*?) (\}) /oxgc) { return $e . e_q($ope,$1,$3,$2); } # q { }
5303							elsif (/\G (\{) /oxgc) { # q { }
5304	356					519	my $q_string = '';
5305	356					623	local $nest = 1;
5306	356					993	while (not /\G \z/oxgc) {
5307	4999	50				29860	if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	50				0
		50
		100
		100
		50
5308	0					0	elsif (/\G (\\\}) /oxgc) { $q_string .= $1; }
5309	0					0	elsif (/\G (\\\{) /oxgc) { $q_string .= $1; }
5310	114					136	elsif (/\G (\{) /oxgc) { $q_string .= $1; $nest++; }
	114					188
5311							elsif (/\G (\}) /oxgc) {
5312	470	100				932	if (--$nest == 0) { return $e . e_q($ope,'{','}',$q_string); }
	356					1111
5313	114					216	else { $q_string .= $1; }
5314							}
5315	4415					7913	elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5316							}
5317	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5318							}
5319
5320							# elsif (/\G (\[) ((?:\\\]\|\\\\\|$q_bracket)*?) (\]) /oxgc) { return $e . e_q($ope,$1,$3,$2); } # q [ ]
5321							elsif (/\G (\[) /oxgc) { # q [ ]
5322	0					0	my $q_string = '';
5323	0					0	local $nest = 1;
5324	0					0	while (not /\G \z/oxgc) {
5325	0	0				0	if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0				0
		0
		0
		0
		0
5326	0					0	elsif (/\G (\\\]) /oxgc) { $q_string .= $1; }
5327	0					0	elsif (/\G (\\\[) /oxgc) { $q_string .= $1; }
5328	0					0	elsif (/\G (\[) /oxgc) { $q_string .= $1; $nest++; }
	0					0
5329							elsif (/\G (\]) /oxgc) {
5330	0	0				0	if (--$nest == 0) { return $e . e_q($ope,'[',']',$q_string); }
	0					0
5331	0					0	else { $q_string .= $1; }
5332							}
5333	0					0	elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5334							}
5335	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5336							}
5337
5338							# elsif (/\G (\<) ((?:\\\>\|\\\\\|$q_angle)*?) (\>) /oxgc) { return $e . e_q($ope,$1,$3,$2); } # q < >
5339							elsif (/\G (\<) /oxgc) { # q < >
5340	5					36	my $q_string = '';
5341	5					21	local $nest = 1;
5342	5					22	while (not /\G \z/oxgc) {
5343	82	50				661	if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	50				0
		50
		50
		100
		50
5344	0					0	elsif (/\G (\\\>) /oxgc) { $q_string .= $1; }
5345	0					0	elsif (/\G (\\\<) /oxgc) { $q_string .= $1; }
5346	0					0	elsif (/\G (\<) /oxgc) { $q_string .= $1; $nest++; }
	0					0
5347							elsif (/\G (\>) /oxgc) {
5348	5	50				15	if (--$nest == 0) { return $e . e_q($ope,'<','>',$q_string); }
	5					16
5349	0					0	else { $q_string .= $1; }
5350							}
5351	77					152	elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5352							}
5353	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5354							}
5355
5356							# elsif (/\G (\S) ((?:\\\1\|\\\\\|$q_char)?) (\1) /oxgc) { return $e . e_q($ope,$1,$3,$2); } # q *
5357							elsif (/\G (\S) /oxgc) { # q * *
5358	1					1	my $delimiter = $1;
5359	1					1	my $q_string = '';
5360	1					3	while (not /\G \z/oxgc) {
5361	14	50				95	if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	50				0
		100
		50
5362	0					0	elsif (/\G (\\\Q$delimiter\E) /oxgc) { $q_string .= $1; }
5363	1					2	elsif (/\G (\Q$delimiter\E) /oxgc) { return $e . e_q($ope,$delimiter,$delimiter,$q_string); }
5364	13					24	elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5365							}
5366	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5367							}
5368							}
5369	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5370							}
5371							}
5372
5373							# m//
5374							elsif (/\G \b (m) \b /oxgc) {
5375	269					416	my $ope = $1;
5376	269	50				2972	if (/\G (\#) ((?:$qq_char)?) (\#) ([cgimosxpadlunbB]) /oxgc) { # m# #
5377	0					0	return e_qr($ope,$1,$3,$2,$4);
5378							}
5379							else {
5380	269					275	my $e = '';
5381	269					584	while (not /\G \z/oxgc) {
5382	269	50				23981	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	50				0
		50
		50
		50
		100
		100
		50
		50
5383	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m ( )
5384	0					0	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m { }
5385	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m [ ]
5386	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m < >
5387	18					41	elsif (/\G (\?) ((?:$qq_char)?) (\?) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m ? ?
5388	13					41	elsif (/\G (\') ((?:$qq_char)?) (\') ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr_q($ope,$1, $3, $2,$4); } # m ' '
5389	0					0	elsif (/\G ([\-:\\^\|]) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]*) /oxgc) { return $e . e_qr ($ope,'{','}',$2,$4); } # m \| \| --> m { }
5390	238					577	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m * *
5391							}
5392	0					0	die __FILE__, ": Search pattern not terminated\n";
5393							}
5394							}
5395
5396							# s///
5397
5398							# about [cegimosxpradlunbB]* (/cg modifier)
5399							#
5400							# P.67 Pattern-Matching Operators
5401							# of ISBN 0-596-00241-6 Perl in a Nutshell, Second Edition.
5402
5403							elsif (/\G \b (s) \b /oxgc) {
5404	132					240	my $ope = $1;
5405
5406							# $1 $2 $3 $4 $5 $6
5407	132	100				6138	if (/\G (\#) ((?:$qq_char)?) (\#) ((?:$qq_char)?) (\#) ([cegimosxpradlunbB]*) /oxgc) { # s# # #
5408	1					4	return e_sub($sub_variable,$1,$2,$3,$3,$4,$5,$6);
5409							}
5410							else {
5411	131					199	my $e = '';
5412	131					387	while (not /\G \z/oxgc) {
5413	131	50				39531	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	50				0
		50
		50
		50
		100
		100
		50
		50
5414							elsif (/\G ($) ((?:$qq_paren)*?) ($) /oxgc) {
5415	0					0	my @s = ($1,$2,$3);
5416	0					0	while (not /\G \z/oxgc) {
5417	0	0				0	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
		0
		0
		0
		0
5418							# $1 $2 $3 $4
5419	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5420	0					0	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5421	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5422	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5423	0					0	elsif (/\G (\') ((?:$qq_char)?) (\') ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5424	0					0	elsif (/\G (\$) ((?:$qq_char)?) (\$) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5425	0					0	elsif (/\G (\:) ((?:$qq_char)?) (\:) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5426	0					0	elsif (/\G (\@) ((?:$qq_char)?) (\@) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5427	0					0	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5428							}
5429	0					0	die __FILE__, ": Substitution replacement not terminated\n";
5430							}
5431							elsif (/\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) {
5432	0					0	my @s = ($1,$2,$3);
5433	0					0	while (not /\G \z/oxgc) {
5434	0	0				0	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
		0
		0
		0
		0
5435							# $1 $2 $3 $4
5436	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5437	0					0	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5438	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5439	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5440	0					0	elsif (/\G (\') ((?:$qq_char)?) (\') ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5441	0					0	elsif (/\G (\$) ((?:$qq_char)?) (\$) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5442	0					0	elsif (/\G (\:) ((?:$qq_char)?) (\:) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5443	0					0	elsif (/\G (\@) ((?:$qq_char)?) (\@) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5444	0					0	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5445							}
5446	0					0	die __FILE__, ": Substitution replacement not terminated\n";
5447							}
5448							elsif (/\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) {
5449	0					0	my @s = ($1,$2,$3);
5450	0					0	while (not /\G \z/oxgc) {
5451	0	0				0	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
		0
		0
5452							# $1 $2 $3 $4
5453	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5454	0					0	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5455	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5456	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5457	0					0	elsif (/\G (\') ((?:$qq_char)?) (\') ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5458	0					0	elsif (/\G (\$) ((?:$qq_char)?) (\$) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5459	0					0	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5460							}
5461	0					0	die __FILE__, ": Substitution replacement not terminated\n";
5462							}
5463							elsif (/\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) {
5464	0					0	my @s = ($1,$2,$3);
5465	0					0	while (not /\G \z/oxgc) {
5466	0	0				0	if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
		0
		0
		0
		0
5467							# $1 $2 $3 $4
5468	0					0	elsif (/\G ($) ((?:$qq_paren)?) ($) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5469	0					0	elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5470	0					0	elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5471	0					0	elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5472	0					0	elsif (/\G (\') ((?:$qq_char)?) (\') ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5473	0					0	elsif (/\G (\$) ((?:$qq_char)?) (\$) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5474	0					0	elsif (/\G (\:) ((?:$qq_char)?) (\:) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5475	0					0	elsif (/\G (\@) ((?:$qq_char)?) (\@) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5476	0					0	elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5477							}
5478	0					0	die __FILE__, ": Substitution replacement not terminated\n";
5479							}
5480							# $1 $2 $3 $4 $5 $6
5481							elsif (/\G (\') ((?:$qq_char)?) (\') ((?:$qq_char)?) (\') ([cegimosxpradlunbB]*) /oxgc) {
5482	22					54	return e_sub($sub_variable,$1,$2,$3,$3,$4,$5,$6);
5483							}
5484							# $1 $2 $3 $4 $5 $6
5485							elsif (/\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]) /oxgc) {
5486	2					9	return e_sub($sub_variable,'{',$2,'}','{',$4,'}',$6); # s \| \| \| --> s { } { }
5487							}
5488							# $1 $2 $3 $4 $5 $6
5489							elsif (/\G (\$) ((?:$qq_char)?) (\1) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]*) /oxgc) {
5490	0					0	return e_sub($sub_variable,$1,$2,$3,$3,$4,$5,$6);
5491							}
5492							# $1 $2 $3 $4 $5 $6
5493							elsif (/\G (\S) ((?:$qq_char)?) (\1) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]*) /oxgc) {
5494	107					394	return e_sub($sub_variable,$1,$2,$3,$3,$4,$5,$6);
5495							}
5496							}
5497	0					0	die __FILE__, ": Substitution pattern not terminated\n";
5498							}
5499							}
5500
5501							# require ignore module
5502	0					0	elsif (/\G \b require ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [#\n]) /oxmsgc) { return "# require$1$2"; }
5503	0					0	elsif (/\G \b require ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [^\x80-\xFF#]) /oxmsgc) { return "# require$1\n$2"; }
5504	0					0	elsif (/\G \b require ((?>\s+) (?:$ignore_modules)) \b /oxmsgc) { return "# require$1"; }
5505
5506							# use strict; --> use strict; no strict qw(refs);
5507	42					580	elsif (/\G \b use ((?>\s+) strict .? ;) ([ \t] [#\n]) /oxmsgc) { return "use$1 no strict qw(refs);$2"; }
5508	0					0	elsif (/\G \b use ((?>\s+) strict .? ;) ([ \t] [^\x80-\xFF#]) /oxmsgc) { return "use$1 no strict qw(refs);\n$2"; }
5509	0					0	elsif (/\G \b use ((?>\s+) strict) \b /oxmsgc) { return "use$1; no strict qw(refs)"; }
5510
5511							# use 5.12.0; --> use 5.12.0; no strict qw(refs);
5512							elsif (/\G \b use (?>\s+) ((?>([1-9][0-9_])(?:\.([0-9_]+)))) (?>\s*) ; /oxmsgc) {
5513	3	50	33			39	if (($2 >= 6) or (($2 == 5) and ($3 ge '012'))) {
			33
5514	0					0	return "use $1; no strict qw(refs);";
5515							}
5516							else {
5517	3					15	return "use $1;";
5518							}
5519							}
5520							elsif (/\G \b use (?>\s+) ((?>v([0-9][0-9_])(?:\.([0-9_]+)))) (?>\s*) ; /oxmsgc) {
5521	0	0	0			0	if (($2 >= 6) or (($2 == 5) and ($3 >= 12))) {
			0
5522	0					0	return "use $1; no strict qw(refs);";
5523							}
5524							else {
5525	0					0	return "use $1;";
5526							}
5527							}
5528
5529							# ignore use module
5530	2					15	elsif (/\G \b use ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [#\n]) /oxmsgc) { return "# use$1$2"; }
5531	0					0	elsif (/\G \b use ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [^\x80-\xFF#]) /oxmsgc) { return "# use$1\n$2"; }
5532	0					0	elsif (/\G \b use ((?>\s+) (?:$ignore_modules)) \b /oxmsgc) { return "# use$1"; }
5533
5534							# ignore no module
5535	0					0	elsif (/\G \b no ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [#\n]) /oxmsgc) { return "# no$1$2"; }
5536	0					0	elsif (/\G \b no ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [^\x80-\xFF#]) /oxmsgc) { return "# no$1\n$2"; }
5537	0					0	elsif (/\G \b no ((?>\s+) (?:$ignore_modules)) \b /oxmsgc) { return "# no$1"; }
5538
5539							# use else
5540	0					0	elsif (/\G \b use \b /oxmsgc) { return "use"; }
5541
5542							# use else
5543	2					6	elsif (/\G \b no \b /oxmsgc) { return "no"; }
5544
5545							# ''
5546							elsif (/\G (?
5547	1582					1983	my $q_string = '';
5548	1582					3401	while (not /\G \z/oxgc) {
5549	10513	100				39365	if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	4	100				9
		100
		50
5550	48					80	elsif (/\G (\\\') /oxgc) { $q_string .= $1; }
5551	1582					3273	elsif (/\G \' /oxgc) { return e_q('', "'","'",$q_string); }
5552	8879					15568	elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5553							}
5554	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5555							}
5556
5557							# ""
5558							elsif (/\G (\") /oxgc) {
5559	5374					6921	my $qq_string = '';
5560	5374					10504	while (not /\G \z/oxgc) {
5561	83640	100				238399	if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	109	100				198
		100
		50
5562	12					23	elsif (/\G (\\\") /oxgc) { $qq_string .= $1; }
5563	5374					9620	elsif (/\G \" /oxgc) { return e_qq('', '"','"',$qq_string); }
5564	78145					127128	elsif (/\G ($q_char) /oxgc) { $qq_string .= $1; }
5565							}
5566	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5567							}
5568
5569							# ``
5570							elsif (/\G (\`) /oxgc) {
5571	1					3	my $qx_string = '';
5572	1					3	while (not /\G \z/oxgc) {
5573	19	50				109	if (/\G (\\\\) /oxgc) { $qx_string .= $1; }
	0	50				0
		100
		50
5574	0					0	elsif (/\G (\\\`) /oxgc) { $qx_string .= $1; }
5575	1					3	elsif (/\G \` /oxgc) { return e_qq('', '`','`',$qx_string); }
5576	18					24	elsif (/\G ($q_char) /oxgc) { $qx_string .= $1; }
5577							}
5578	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5579							}
5580
5581							# // --- not divide operator (num / num), not defined-or
5582							elsif (($slash eq 'm//') and /\G (\/) /oxgc) {
5583	1068					1510	my $regexp = '';
5584	1068					2497	while (not /\G \z/oxgc) {
5585	10229	100				36771	if (/\G (\\\\) /oxgc) { $regexp .= $1; }
	1	50				3
		100
		50
5586	0					0	elsif (/\G (\\\/) /oxgc) { $regexp .= $1; }
5587	1068					2387	elsif (/\G \/ ([cgimosxpadlunbB]*) /oxgc) { return e_qr('', '/','/',$regexp,$1); }
5588	9160					15925	elsif (/\G ($q_char) /oxgc) { $regexp .= $1; }
5589							}
5590	0					0	die __FILE__, ": Search pattern not terminated\n";
5591							}
5592
5593							# ?? --- not conditional operator (condition ? then : else)
5594							elsif (($slash eq 'm//') and /\G (\?) /oxgc) {
5595	18					24	my $regexp = '';
5596	18					42	while (not /\G \z/oxgc) {
5597	82	50				404	if (/\G (\\\\) /oxgc) { $regexp .= $1; }
	0	50				0
		100
		50
5598	0					0	elsif (/\G (\\\?) /oxgc) { $regexp .= $1; }
5599	18					36	elsif (/\G \? ([cgimosxpadlunbB]*) /oxgc) { return e_qr('m','?','?',$regexp,$1); }
5600	64					121	elsif (/\G ($q_char) /oxgc) { $regexp .= $1; }
5601							}
5602	0					0	die __FILE__, ": Search pattern not terminated\n";
5603							}
5604
5605							# <<>> (a safer ARGV)
5606	0					0	elsif (/\G ( <<>> ) /oxgc) { $slash = 'm//'; return $1; }
	0					0
5607
5608							# << (bit shift) --- not here document
5609	0					0	elsif (/\G ( << (?>\s*) ) (?= [0-9\$\@\&] ) /oxgc) { $slash = 'm//'; return $1; }
	0					0
5610
5611							# <<'HEREDOC'
5612							elsif (/\G ( << '([a-zA-Z_0-9]*)' ) /oxgc) {
5613	80					98	$slash = 'm//';
5614	80					139	my $here_quote = $1;
5615	80					130	my $delimiter = $2;
5616
5617							# get here document
5618	80	100				146	if ($here_script eq '') {
5619	77					390	$here_script = CORE::substr $_, pos $_;
5620	77					409	$here_script =~ s/.*?\n//oxm;
5621							}
5622	80	50				663	if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
5623	80					243	push @heredoc, $1 . qq{\n$delimiter\n};
5624	80					124	push @heredoc_delimiter, $delimiter;
5625							}
5626							else {
5627	0					0	die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
5628							}
5629	80					311	return $here_quote;
5630							}
5631
5632							# <<\HEREDOC
5633
5634							# P.66 2.6.6. "Here" Documents
5635							# in Chapter 2: Bits and Pieces
5636							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
5637
5638							# P.73 "Here" Documents
5639							# in Chapter 2: Bits and Pieces
5640							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
5641
5642							elsif (/\G ( << \\([a-zA-Z_0-9]+) ) /oxgc) {
5643	2					3	$slash = 'm//';
5644	2					3	my $here_quote = $1;
5645	2					3	my $delimiter = $2;
5646
5647							# get here document
5648	2	100				6	if ($here_script eq '') {
5649	1					13	$here_script = CORE::substr $_, pos $_;
5650	1					6	$here_script =~ s/.*?\n//oxm;
5651							}
5652	2	50				23	if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
5653	2					5	push @heredoc, $1 . qq{\n$delimiter\n};
5654	2					3	push @heredoc_delimiter, $delimiter;
5655							}
5656							else {
5657	0					0	die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
5658							}
5659	2					7	return $here_quote;
5660							}
5661
5662							# <<"HEREDOC"
5663							elsif (/\G ( << "([a-zA-Z_0-9]*)" ) /oxgc) {
5664	39					64	$slash = 'm//';
5665	39					68	my $here_quote = $1;
5666	39					52	my $delimiter = $2;
5667
5668							# get here document
5669	39	100				407	if ($here_script eq '') {
5670	38					180	$here_script = CORE::substr $_, pos $_;
5671	38					173	$here_script =~ s/.*?\n//oxm;
5672							}
5673	39	50				447	if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
5674	39					356	push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
5675	39					66	push @heredoc_delimiter, $delimiter;
5676							}
5677							else {
5678	0					0	die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
5679							}
5680	39					198	return $here_quote;
5681							}
5682
5683							# <
5684							elsif (/\G ( << ([a-zA-Z_0-9]+) ) /oxgc) {
5685	54					87	$slash = 'm//';
5686	54					113	my $here_quote = $1;
5687	54					76	my $delimiter = $2;
5688
5689							# get here document
5690	54	100				125	if ($here_script eq '') {
5691	51					256	$here_script = CORE::substr $_, pos $_;
5692	51					253	$here_script =~ s/.*?\n//oxm;
5693							}
5694	54	50				686	if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
5695	54					135	push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
5696	54					95	push @heredoc_delimiter, $delimiter;
5697							}
5698							else {
5699	0					0	die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
5700							}
5701	54					202	return $here_quote;
5702							}
5703
5704							# <<`HEREDOC`
5705							elsif (/\G ( << `([a-zA-Z_0-9]*)` ) /oxgc) {
5706	0					0	$slash = 'm//';
5707	0					0	my $here_quote = $1;
5708	0					0	my $delimiter = $2;
5709
5710							# get here document
5711	0	0				0	if ($here_script eq '') {
5712	0					0	$here_script = CORE::substr $_, pos $_;
5713	0					0	$here_script =~ s/.*?\n//oxm;
5714							}
5715	0	0				0	if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
5716	0					0	push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
5717	0					0	push @heredoc_delimiter, $delimiter;
5718							}
5719							else {
5720	0					0	die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
5721							}
5722	0					0	return $here_quote;
5723							}
5724
5725							# <<= <=> <= < operator
5726							elsif (/\G ( <<= \| <=> \| <= \| < ) (?= (?>\s) [A-Za-z_0-9'"`\$\@\&\\(\+\-] )/oxgc) {
5727	11					39	return $1;
5728							}
5729
5730							#
5731							elsif (/\G (<[\$]?[A-Za-z_][A-Za-z_0-9]*>) /oxgc) {
5732	0					0	return $1;
5733							}
5734
5735							# --- glob
5736
5737							# avoid "Error: Runtime exception" of perl version 5.005_03
5738
5739							elsif (/\G < ((?:[^\x80-\xFF>\0\a\e\f\n\r\t]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])+?) > /oxgc) {
5740	0					0	return 'Eutf2::glob("' . $1 . '")';
5741							}
5742
5743							# __DATA__
5744	0					0	elsif (/\G ^ ( __DATA__ \n .*) \z /oxmsgc) { return $1; }
5745
5746							# __END__
5747	302					1648	elsif (/\G ^ ( __END__ \n .*) \z /oxmsgc) { return $1; }
5748
5749							# \cD Control-D
5750
5751							# P.68 2.6.8. Other Literal Tokens
5752							# in Chapter 2: Bits and Pieces
5753							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
5754
5755							# P.76 Other Literal Tokens
5756							# in Chapter 2: Bits and Pieces
5757							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
5758
5759	0					0	elsif (/\G ( \cD .*) \z /oxmsgc) { return $1; }
5760
5761							# \cZ Control-Z
5762	0					0	elsif (/\G ( \cZ .*) \z /oxmsgc) { return $1; }
5763
5764							# any operator before div
5765							elsif (/\G (
5766							-- \| \+\+ \|
5767							[\)\}\]]
5768
5769	8299					8826	) /oxgc) { $slash = 'div'; return $1; }
	8299					29959
5770
5771							# yada-yada or triple-dot operator
5772							elsif (/\G (
5773							\.\.\.
5774
5775	7					4	) /oxgc) { $slash = 'm//'; return q{die('Unimplemented')}; }
	7					22
5776
5777							# any operator before m//
5778
5779							# //, //= (defined-or)
5780
5781							# P.164 Logical Operators
5782							# in Chapter 10: More Control Structures
5783							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
5784
5785							# P.119 C-Style Logical (Short-Circuit) Operators
5786							# in Chapter 3: Unary and Binary Operators
5787							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
5788
5789							# (and so on)
5790
5791							# ~~
5792
5793							# P.221 The Smart Match Operator
5794							# in Chapter 15: Smart Matching and given-when
5795							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
5796
5797							# P.112 Smartmatch Operator
5798							# in Chapter 3: Unary and Binary Operators
5799							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
5800
5801							# (and so on)
5802
5803							elsif (/\G ((?>
5804
5805							!~~ \| !~ \| != \| ! \|
5806							%= \| % \|
5807							&&= \| && \| &= \| &\.= \| &\. \| & \|
5808							-= \| -> \| - \|
5809							:(?>\s*)= \|
5810							: \|
5811							<<>> \|
5812							<<= \| <=> \| <= \| < \|
5813							== \| => \| =~ \| = \|
5814							>>= \| >> \| >= \| > \|
5815							\\= \| \\ \| \= \| \ \|
5816							\+= \| \+ \|
5817							\.\. \| \.= \| \. \|
5818							\/\/= \| \/\/ \|
5819							\/= \| \/ \|
5820							\? \|
5821							\\ \|
5822							\^= \| \^\.= \| \^\. \| \^ \|
5823							\b x= \|
5824							\\|\\|= \| \\|\\| \| \\|= \| \\|\.= \| \\|\. \| \\| \|
5825							~~ \| ~\. \| ~ \|
5826							\b(?: and \| cmp \| eq \| ge \| gt \| le \| lt \| ne \| not \| or \| xor \| x )\b \|
5827							\b(?: print )\b \|
5828
5829							[,;\(\{\[]
5830
5831	15885					16607	)) /oxgc) { $slash = 'm//'; return $1; }
	15885					56800
5832
5833							# other any character
5834	23102					21811	elsif (/\G ($q_char) /oxgc) { $slash = 'div'; return $1; }
	23102					88321
5835
5836							# system error
5837							else {
5838	0					0	die __FILE__, ": Oops, this shouldn't happen!\n";
5839							}
5840							}
5841
5842							# escape UTF-8 string
5843							sub e_string {
5844	2444			2444	0	3768	my($string) = @_;
5845	2444					2184	my $e_string = '';
5846
5847	2444					2463	local $slash = 'm//';
5848
5849							# P.1024 Appendix W.10 Multibyte Processing
5850							# of ISBN 1-56592-224-7 CJKV Information Processing
5851							# (and so on)
5852
5853	2444					32605	my @char = $string =~ / \G (?>[^\x80-\xFF\\]\|\\$q_char\|$q_char) /oxmsg;
5854
5855							# without { ... }
5856	2444	100	66			9855	if (not (grep(/\A \{ \z/xms, @char) and grep(/\A \} \z/xms, @char))) {
5857	2405	50				4102	if ($string !~ /<
5858	2405					4668	return $string;
5859							}
5860							}
5861
5862							E_STRING_LOOP:
5863	39					95	while ($string !~ /\G \z/oxgc) {
5864	293	50				24310	if (0) {
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		100
		100
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		100
		100
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		100
		50
		100
		50
5865							}
5866
5867							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> @{[Eutf2::PREMATCH()]}
5868	0					0	elsif ($string =~ /\G ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \b \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) /oxmsgc) {
5869	0					0	$e_string .= q{Eutf2::PREMATCH()};
5870	0					0	$slash = 'div';
5871							}
5872
5873							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> @{[Eutf2::MATCH()]}
5874							elsif ($string =~ /\G ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \b \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) /oxmsgc) {
5875	0					0	$e_string .= q{Eutf2::MATCH()};
5876	0					0	$slash = 'div';
5877							}
5878
5879							# $', ${'} --> $', ${'}
5880							elsif ($string =~ /\G ( \$' \| \$\{'\} ) /oxmsgc) {
5881	0					0	$e_string .= $1;
5882	0					0	$slash = 'div';
5883							}
5884
5885							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> @{[Eutf2::POSTMATCH()]}
5886							elsif ($string =~ /\G ( \$ (?>\s) POSTMATCH \b \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) /oxmsgc) {
5887	0					0	$e_string .= q{Eutf2::POSTMATCH()};
5888	0					0	$slash = 'div';
5889							}
5890
5891							# bareword
5892							elsif ($string =~ /\G ( \{ (?>\s) (?: tr \| index \| rindex \| reverse ) (?>\s) \} ) /oxmsgc) {
5893	0					0	$e_string .= $1;
5894	0					0	$slash = 'div';
5895							}
5896
5897							# $0 --> $0
5898							elsif ($string =~ /\G ( \$ 0 ) /oxmsgc) {
5899	0					0	$e_string .= $1;
5900	0					0	$slash = 'div';
5901							}
5902							elsif ($string =~ /\G ( \$ \{ (?>\s) 0 (?>\s) \} ) /oxmsgc) {
5903	0					0	$e_string .= $1;
5904	0					0	$slash = 'div';
5905							}
5906
5907							# $$ --> $$
5908							elsif ($string =~ /\G ( \$ \$ ) (?![\w\{]) /oxmsgc) {
5909	0					0	$e_string .= $1;
5910	0					0	$slash = 'div';
5911							}
5912
5913							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
5914							# $1, $2, $3 --> $1, $2, $3 otherwise
5915							elsif ($string =~ /\G \$ ((?>[1-9][0-9]*)) /oxmsgc) {
5916	0					0	$e_string .= e_capture($1);
5917	0					0	$slash = 'div';
5918							}
5919							elsif ($string =~ /\G \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} /oxmsgc) {
5920	0					0	$e_string .= e_capture($1);
5921	0					0	$slash = 'div';
5922							}
5923
5924							# $$foo[ ... ] --> $ $foo->[ ... ]
5925							elsif ($string =~ /\G \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ .+? \] ) /oxmsgc) {
5926	0					0	$e_string .= e_capture($1.'->'.$2);
5927	0					0	$slash = 'div';
5928							}
5929
5930							# $$foo{ ... } --> $ $foo->{ ... }
5931							elsif ($string =~ /\G \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ .+? \} ) /oxmsgc) {
5932	0					0	$e_string .= e_capture($1.'->'.$2);
5933	0					0	$slash = 'div';
5934							}
5935
5936							# $$foo
5937							elsif ($string =~ /\G \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) /oxmsgc) {
5938	0					0	$e_string .= e_capture($1);
5939	0					0	$slash = 'div';
5940							}
5941
5942							# ${ foo }
5943							elsif ($string =~ /\G \$ (?>\s) \{ ((?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* )) \} /oxmsgc) {
5944	0					0	$e_string .= '${' . $1 . '}';
5945	0					0	$slash = 'div';
5946							}
5947
5948							# ${ ... }
5949							elsif ($string =~ /\G \$ (?>\s) \{ (?>\s) ( $qq_brace ) (?>\s*) \} /oxmsgc) {
5950	3					10	$e_string .= e_capture($1);
5951	3					15	$slash = 'div';
5952							}
5953
5954							# variable or function
5955							# $ @ % & * $ #
5956							elsif ($string =~ /\G ( (?: [\$\@\%\&\] \| \$\# \| -> \| \b sub \b) (?>\s) (?: split \| chop \| index \| rindex \| lc \| uc \| fc \| chr \| ord \| reverse \| getc \| tr \| y \| q \| qq \| qx \| qw \| m \| s \| qr \| glob \| lstat \| opendir \| stat \| unlink \| chdir ) ) \b /oxmsgc) {
5957	1					3	$e_string .= $1;
5958	1					5	$slash = 'div';
5959							}
5960							# $ $ $ $ $ $ $ $ $ $ $ $ $ $
5961							# $ @ # \ ' " / ? ( ) [ ] < >
5962							elsif ($string =~ /\G ( \$[\$\@\#\\\'\"\/\?\[\]\<\>] ) /oxmsgc) {
5963	0					0	$e_string .= $1;
5964	0					0	$slash = 'div';
5965							}
5966
5967							# subroutines of package Eutf2
5968	0					0	elsif ($string =~ /\G \b (CORE:: \| ->(>?\s*) (?: atan2 \| [a-z]{2,})) \b /oxgc) { $e_string .= $1; $slash = 'm//'; }
	0					0
5969	0					0	elsif ($string =~ /\G \b Char::eval (?= (?>\s*) \{ ) /oxgc) { $e_string .= 'eval'; $slash = 'm//'; }
	0					0
5970	0					0	elsif ($string =~ /\G \b UTF2::eval (?= (?>\s*) \{ ) /oxgc) { $e_string .= 'eval'; $slash = 'm//'; }
	0					0
5971	0					0	elsif ($string =~ /\G \b Char::eval \b /oxgc) { $e_string .= 'eval Char::escape'; $slash = 'm//'; }
	0					0
5972	0					0	elsif ($string =~ /\G \b UTF2::eval \b /oxgc) { $e_string .= 'eval UTF2::escape'; $slash = 'm//'; }
	0					0
5973	0					0	elsif ($string =~ /\G \b bytes::substr \b /oxgc) { $e_string .= 'substr'; $slash = 'm//'; }
	0					0
5974	0					0	elsif ($string =~ /\G \b chop \b /oxgc) { $e_string .= 'Eutf2::chop'; $slash = 'm//'; }
	0					0
5975	0					0	elsif ($string =~ /\G \b bytes::index \b /oxgc) { $e_string .= 'index'; $slash = 'm//'; }
	0					0
5976	0					0	elsif ($string =~ /\G \b Char::index \b /oxgc) { $e_string .= 'Char::index'; $slash = 'm//'; }
	0					0
5977	0					0	elsif ($string =~ /\G \b UTF2::index \b /oxgc) { $e_string .= 'UTF2::index'; $slash = 'm//'; }
	0					0
5978	0					0	elsif ($string =~ /\G \b index \b /oxgc) { $e_string .= 'Eutf2::index'; $slash = 'm//'; }
	0					0
5979	0					0	elsif ($string =~ /\G \b bytes::rindex \b /oxgc) { $e_string .= 'rindex'; $slash = 'm//'; }
	0					0
5980	0					0	elsif ($string =~ /\G \b Char::rindex \b /oxgc) { $e_string .= 'Char::rindex'; $slash = 'm//'; }
	0					0
5981	0					0	elsif ($string =~ /\G \b UTF2::rindex \b /oxgc) { $e_string .= 'UTF2::rindex'; $slash = 'm//'; }
	0					0
5982	0					0	elsif ($string =~ /\G \b rindex \b /oxgc) { $e_string .= 'Eutf2::rindex'; $slash = 'm//'; }
	0					0
5983	0					0	elsif ($string =~ /\G \b lc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::lc'; $slash = 'm//'; }
	0					0
5984	0					0	elsif ($string =~ /\G \b lcfirst (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::lcfirst'; $slash = 'm//'; }
	0					0
5985	0					0	elsif ($string =~ /\G \b uc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::uc'; $slash = 'm//'; }
	0					0
5986	0					0	elsif ($string =~ /\G \b ucfirst (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::ucfirst'; $slash = 'm//'; }
	0					0
5987	0					0	elsif ($string =~ /\G \b fc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::fc'; $slash = 'm//'; }
	0					0
5988
5989							# "-s '' ..." means file test "-s 'filename' ..." (not means "- s/// ...")
5990	0					0	elsif ($string =~ /\G -s (?>\s*) (\") ((?:$qq_char)+?) (\") /oxgc) { $e_string .= '-s ' . e_qq('', $1,$3,$2); $slash = 'm//'; }
	0					0
5991	0					0	elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) (\#) ((?:$qq_char)+?) (\#) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0					0
5992	0					0	elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) ($) ((?:$qq_paren)+?) ($) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0					0
5993	0					0	elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) (\{) ((?:$qq_brace)+?) (\}) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0					0
5994	0					0	elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) (\[) ((?:$qq_bracket)+?) (\]) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0					0
5995	0					0	elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) (\<) ((?:$qq_angle)+?) (\>) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0					0
5996	0					0	elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) (\S) ((?:$qq_char)+?) (\1) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0					0
5997
5998	0					0	elsif ($string =~ /\G -s (?>\s*) (\') ((?:\\\'\|\\\\\|$q_char)+?) (\') /oxgc) { $e_string .= '-s ' . e_q ('', $1,$3,$2); $slash = 'm//'; }
	0					0
5999	0					0	elsif ($string =~ /\G -s (?>\s+) q (?>\s*) (\#) ((?:\\\#\|\\\\\|$q_char)+?) (\#) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0					0
6000	0					0	elsif ($string =~ /\G -s (?>\s+) q (?>\s*) ($) ((?:\\$\|\\\\\|$q_paren)+?) (\)) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0					0
6001	0					0	elsif ($string =~ /\G -s (?>\s+) q (?>\s*) (\{) ((?:\\\}\|\\\\\|$q_brace)+?) (\}) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0					0
6002	0					0	elsif ($string =~ /\G -s (?>\s+) q (?>\s*) (\[) ((?:\\\]\|\\\\\|$q_bracket)+?) (\]) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0					0
6003	0					0	elsif ($string =~ /\G -s (?>\s+) q (?>\s*) (\<) ((?:\\\>\|\\\\\|$q_angle)+?) (\>) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0					0
6004	0					0	elsif ($string =~ /\G -s (?>\s+) q (?>\s*) (\S) ((?:\\\1\|\\\\\|$q_char)+?) (\1) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0					0
6005
6006							elsif ($string =~ /\G -s (?>\s) (\$ (?> \w+ (?: ::\w+)) (?: (?: ->)? (?: [\$\@\%\&\]\ \| \$\#\* \| $ (?:$qq_paren)? $ \| [\@\%\]? \{ (?:$qq_brace)+? \} \| [\@\%]? \[ (?:$qq_bracket)+? \] ))*) /oxgc)
6007	0					0	{ $e_string .= "-s $1"; $slash = 'm//'; }
	0					0
6008	0					0	elsif ($string =~ /\G -s (?>\s) $ ((?:$qq_paren)?) $ /oxgc) { $e_string .= "-s ($1)"; $slash = 'm//'; }
	0					0
6009	0					0	elsif ($string =~ /\G -s (?= (?>\s+) [a-z]+) /oxgc) { $e_string .= '-s'; $slash = 'm//'; }
	0					0
6010	0					0	elsif ($string =~ /\G -s (?>\s+) ((?>\w+)) /oxgc) { $e_string .= "-s $1"; $slash = 'm//'; }
	0					0
6011
6012	0					0	elsif ($string =~ /\G \b bytes::length (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'length'; $slash = 'm//'; }
	0					0
6013	0					0	elsif ($string =~ /\G \b bytes::chr (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'chr'; $slash = 'm//'; }
	0					0
6014	0					0	elsif ($string =~ /\G \b chr (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::chr'; $slash = 'm//'; }
	0					0
6015	0					0	elsif ($string =~ /\G \b bytes::ord (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'ord'; $slash = 'div'; }
	0					0
6016	0					0	elsif ($string =~ /\G \b ord (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= $function_ord; $slash = 'div'; }
	0					0
6017	0					0	elsif ($string =~ /\G \b glob (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::glob'; $slash = 'm//'; }
	0					0
6018	0					0	elsif ($string =~ /\G \b lc \b /oxgc) { $e_string .= 'Eutf2::lc_'; $slash = 'm//'; }
	0					0
6019	0					0	elsif ($string =~ /\G \b lcfirst \b /oxgc) { $e_string .= 'Eutf2::lcfirst_'; $slash = 'm//'; }
	0					0
6020	0					0	elsif ($string =~ /\G \b uc \b /oxgc) { $e_string .= 'Eutf2::uc_'; $slash = 'm//'; }
	0					0
6021	0					0	elsif ($string =~ /\G \b ucfirst \b /oxgc) { $e_string .= 'Eutf2::ucfirst_'; $slash = 'm//'; }
	0					0
6022	0					0	elsif ($string =~ /\G \b fc \b /oxgc) { $e_string .= 'Eutf2::fc_'; $slash = 'm//'; }
	0					0
6023	0					0	elsif ($string =~ /\G -s \b /oxgc) { $e_string .= '-s '; $slash = 'm//'; }
	0					0
6024
6025	0					0	elsif ($string =~ /\G \b bytes::length \b /oxgc) { $e_string .= 'length'; $slash = 'm//'; }
	0					0
6026	0					0	elsif ($string =~ /\G \b bytes::chr \b /oxgc) { $e_string .= 'chr'; $slash = 'm//'; }
	0					0
6027	0					0	elsif ($string =~ /\G \b chr \b /oxgc) { $e_string .= 'Eutf2::chr_'; $slash = 'm//'; }
	0					0
6028	0					0	elsif ($string =~ /\G \b bytes::ord \b /oxgc) { $e_string .= 'ord'; $slash = 'div'; }
	0					0
6029	0					0	elsif ($string =~ /\G \b ord \b /oxgc) { $e_string .= $function_ord_; $slash = 'div'; }
	0					0
6030	0					0	elsif ($string =~ /\G \b glob \b /oxgc) { $e_string .= 'Eutf2::glob_'; $slash = 'm//'; }
	0					0
6031	0					0	elsif ($string =~ /\G \b reverse \b /oxgc) { $e_string .= $function_reverse; $slash = 'm//'; }
	0					0
6032	0					0	elsif ($string =~ /\G \b getc \b /oxgc) { $e_string .= $function_getc; $slash = 'm//'; }
	0					0
6033							# split
6034							elsif ($string =~ /\G \b (split) \b (?! (?>\s*) => ) /oxgc) {
6035	0					0	$slash = 'm//';
6036
6037	0					0	my $e = '';
6038	0					0	while ($string =~ /\G ( (?>\s+) \| \( \| \#.* ) /oxgc) {
6039	0					0	$e .= $1;
6040							}
6041
6042							# end of split
6043	0	0				0	if ($string =~ /\G (?= [,;\)\}\]] ) /oxgc) { return 'Eutf2::split' . $e; }
	0	0				0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
6044
6045							# split scalar value
6046	0					0	elsif ($string =~ /\G ( [\$\@\&\*] $qq_scalar ) /oxgc) { $e_string .= 'Eutf2::split' . $e . e_string($1); next E_STRING_LOOP; }
	0					0
6047
6048							# split literal space
6049	0					0	elsif ($string =~ /\G \b qq (\#) [ ] (\#) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {qq$1 $2}; next E_STRING_LOOP; }
	0					0
6050	0					0	elsif ($string =~ /\G \b qq ((?>\s*)) ($) [ ] ($) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq{$1qq$2 $3}; next E_STRING_LOOP; }
	0					0
6051	0					0	elsif ($string =~ /\G \b qq ((?>\s*)) (\{) [ ] (\}) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq{$1qq$2 $3}; next E_STRING_LOOP; }
	0					0
6052	0					0	elsif ($string =~ /\G \b qq ((?>\s*)) (\[) [ ] (\]) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq{$1qq$2 $3}; next E_STRING_LOOP; }
	0					0
6053	0					0	elsif ($string =~ /\G \b qq ((?>\s*)) (\<) [ ] (\>) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq{$1qq$2 $3}; next E_STRING_LOOP; }
	0					0
6054	0					0	elsif ($string =~ /\G \b qq ((?>\s*)) (\S) [ ] (\2) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq{$1qq$2 $3}; next E_STRING_LOOP; }
	0					0
6055	0					0	elsif ($string =~ /\G \b q (\#) [ ] (\#) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {q$1 $2}; next E_STRING_LOOP; }
	0					0
6056	0					0	elsif ($string =~ /\G \b q ((?>\s*)) ($) [ ] ($) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {$1q$2 $3}; next E_STRING_LOOP; }
	0					0
6057	0					0	elsif ($string =~ /\G \b q ((?>\s*)) (\{) [ ] (\}) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {$1q$2 $3}; next E_STRING_LOOP; }
	0					0
6058	0					0	elsif ($string =~ /\G \b q ((?>\s*)) (\[) [ ] (\]) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {$1q$2 $3}; next E_STRING_LOOP; }
	0					0
6059	0					0	elsif ($string =~ /\G \b q ((?>\s*)) (\<) [ ] (\>) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {$1q$2 $3}; next E_STRING_LOOP; }
	0					0
6060	0					0	elsif ($string =~ /\G \b q ((?>\s*)) (\S) [ ] (\2) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {$1q$2 $3}; next E_STRING_LOOP; }
	0					0
6061	0					0	elsif ($string =~ /\G ' [ ] ' /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {' '}; next E_STRING_LOOP; }
	0					0
6062	0					0	elsif ($string =~ /\G " [ ] " /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {" "}; next E_STRING_LOOP; }
	0					0
6063
6064							# split qq//
6065							elsif ($string =~ /\G \b (qq) \b /oxgc) {
6066	0	0				0	if ($string =~ /\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq# # --> qr # #
	0					0
	0					0
6067							else {
6068	0					0	while ($string !~ /\G \z/oxgc) {
6069	0	0				0	if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e_string .= $e . $1; }
	0	0				0
		0
		0
		0
		0
		0
6070	0					0	elsif ($string =~ /\G ($) ((?:$qq_paren)*?) ($) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq ( ) --> qr ( )
	0					0
6071	0					0	elsif ($string =~ /\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq { } --> qr { }
	0					0
6072	0					0	elsif ($string =~ /\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq [ ] --> qr [ ]
	0					0
6073	0					0	elsif ($string =~ /\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq < > --> qr < >
	0					0
6074	0					0	elsif ($string =~ /\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) /oxgc) { $e_string .= e_split($e.'qr','{','}',$2,''); next E_STRING_LOOP; } # qq \| \| --> qr { }
	0					0
6075	0					0	elsif ($string =~ /\G (\S) ((?:$qq_char)?) (\1) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq * --> qr * *
	0					0
6076							}
6077	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
6078							}
6079							}
6080
6081							# split qr//
6082							elsif ($string =~ /\G \b (qr) \b /oxgc) {
6083	0	0				0	if ($string =~ /\G (\#) ((?:$qq_char)?) (\#) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1,$3,$2,$4); next E_STRING_LOOP; } # qr# #
	0					0
	0					0
6084							else {
6085	0					0	while ($string !~ /\G \z/oxgc) {
6086	0	0				0	if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e_string .= $e . $1; }
	0	0				0
		0
		0
		0
		0
		0
		0
6087	0					0	elsif ($string =~ /\G ($) ((?:$qq_paren)?) ($) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr ( )
	0					0
6088	0					0	elsif ($string =~ /\G (\{) ((?:$qq_brace)?) (\}) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr { }
	0					0
6089	0					0	elsif ($string =~ /\G (\[) ((?:$qq_bracket)?) (\]) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr [ ]
	0					0
6090	0					0	elsif ($string =~ /\G (\<) ((?:$qq_angle)?) (\>) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr < >
	0					0
6091	0					0	elsif ($string =~ /\G (\') ((?:$qq_char)?) (\') ([imosxpadlunbB]) /oxgc) { $e_string .= e_split_q($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr ' '
	0					0
6092	0					0	elsif ($string =~ /\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ([imosxpadlunbB]*) /oxgc) { $e_string .= e_split ($e.'qr','{','}',$2,$4); next E_STRING_LOOP; } # qr \| \| --> qr { }
	0					0
6093	0					0	elsif ($string =~ /\G (\S) ((?:$qq_char)?) (\1) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr * *
	0					0
6094							}
6095	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
6096							}
6097							}
6098
6099							# split q//
6100							elsif ($string =~ /\G \b (q) \b /oxgc) {
6101	0	0				0	if ($string =~ /\G (\#) ((?:\\\#\|\\\\\|$q_char)*?) (\#) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q# # --> qr # #
	0					0
	0					0
6102							else {
6103	0					0	while ($string !~ /\G \z/oxgc) {
6104	0	0				0	if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e_string .= $e . $1; }
	0	0				0
		0
		0
		0
		0
		0
6105	0					0	elsif ($string =~ /\G ($) ((?:\\\\\|\\$\|\\$\|$q_paren)*?) ($) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q ( ) --> qr ( )
	0					0
6106	0					0	elsif ($string =~ /\G (\{) ((?:\\\\\|\\\}\|\\\{\|$q_brace)*?) (\}) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q { } --> qr { }
	0					0
6107	0					0	elsif ($string =~ /\G (\[) ((?:\\\\\|\\\]\|\\\[\|$q_bracket)*?) (\]) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q [ ] --> qr [ ]
	0					0
6108	0					0	elsif ($string =~ /\G (\<) ((?:\\\\\|\\\>\|\\\<\|$q_angle)*?) (\>) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q < > --> qr < >
	0					0
6109	0					0	elsif ($string =~ /\G ([\-:?\\^\|]) ((?:$q_char)?) (\1) /oxgc) { $e_string .= e_split_q($e.'qr','{','}',$2,''); next E_STRING_LOOP; } # q \| \| --> qr { }
	0					0
6110	0					0	elsif ($string =~ /\G (\S) ((?:\\\\\|\\\1\| $q_char)?) (\1) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q * --> qr * *
	0					0
6111							}
6112	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
6113							}
6114							}
6115
6116							# split m//
6117							elsif ($string =~ /\G \b (m) \b /oxgc) {
6118	0	0				0	if ($string =~ /\G (\#) ((?:$qq_char)?) (\#) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1,$3,$2,$4); next E_STRING_LOOP; } # m# # --> qr # #
	0					0
	0					0
6119							else {
6120	0					0	while ($string !~ /\G \z/oxgc) {
6121	0	0				0	if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e_string .= $e . $1; }
	0	0				0
		0
		0
		0
		0
		0
		0
6122	0					0	elsif ($string =~ /\G ($) ((?:$qq_paren)?) ($) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m ( ) --> qr ( )
	0					0
6123	0					0	elsif ($string =~ /\G (\{) ((?:$qq_brace)?) (\}) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m { } --> qr { }
	0					0
6124	0					0	elsif ($string =~ /\G (\[) ((?:$qq_bracket)?) (\]) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m [ ] --> qr [ ]
	0					0
6125	0					0	elsif ($string =~ /\G (\<) ((?:$qq_angle)?) (\>) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m < > --> qr < >
	0					0
6126	0					0	elsif ($string =~ /\G (\') ((?:$qq_char)?) (\') ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split_q($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m ' ' --> qr ' '
	0					0
6127	0					0	elsif ($string =~ /\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]*) /oxgc) { $e_string .= e_split ($e.'qr','{','}',$2,$4); next E_STRING_LOOP; } # m \| \| --> qr { }
	0					0
6128	0					0	elsif ($string =~ /\G (\S) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m * * --> qr * *
	0					0
6129							}
6130	0					0	die __FILE__, ": Search pattern not terminated\n";
6131							}
6132							}
6133
6134							# split ''
6135							elsif ($string =~ /\G (\') /oxgc) {
6136	0					0	my $q_string = '';
6137	0					0	while ($string !~ /\G \z/oxgc) {
6138	0	0				0	if ($string =~ /\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0				0
		0
		0
6139	0					0	elsif ($string =~ /\G (\\\') /oxgc) { $q_string .= $1; } # splitqr'' --> split qr''
6140	0					0	elsif ($string =~ /\G \' /oxgc) { $e_string .= e_split_q($e.q{ qr},"'","'",$q_string,''); next E_STRING_LOOP; } # ' ' --> qr ' '
	0					0
6141	0					0	elsif ($string =~ /\G ($q_char) /oxgc) { $q_string .= $1; }
6142							}
6143	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
6144							}
6145
6146							# split ""
6147							elsif ($string =~ /\G (\") /oxgc) {
6148	0					0	my $qq_string = '';
6149	0					0	while ($string !~ /\G \z/oxgc) {
6150	0	0				0	if ($string =~ /\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0				0
		0
		0
6151	0					0	elsif ($string =~ /\G (\\\") /oxgc) { $qq_string .= $1; } # splitqr"" --> split qr""
6152	0					0	elsif ($string =~ /\G \" /oxgc) { $e_string .= e_split($e.q{ qr},'"','"',$qq_string,''); next E_STRING_LOOP; } # " " --> qr " "
	0					0
6153	0					0	elsif ($string =~ /\G ($q_char) /oxgc) { $qq_string .= $1; }
6154							}
6155	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
6156							}
6157
6158							# split //
6159							elsif ($string =~ /\G (\/) /oxgc) {
6160	0					0	my $regexp = '';
6161	0					0	while ($string !~ /\G \z/oxgc) {
6162	0	0				0	if ($string =~ /\G (\\\\) /oxgc) { $regexp .= $1; }
	0	0				0
		0
		0
6163	0					0	elsif ($string =~ /\G (\\\/) /oxgc) { $regexp .= $1; } # splitqr// --> split qr//
6164	0					0	elsif ($string =~ /\G \/ ([cgimosxpadlunbB]*) /oxgc) { $e_string .= e_split($e.q{ qr}, '/','/',$regexp,$1); next E_STRING_LOOP; } # / / --> qr / /
	0					0
6165	0					0	elsif ($string =~ /\G ($q_char) /oxgc) { $regexp .= $1; }
6166							}
6167	0					0	die __FILE__, ": Search pattern not terminated\n";
6168							}
6169							}
6170
6171							# qq//
6172							elsif ($string =~ /\G \b (qq) \b /oxgc) {
6173	0					0	my $ope = $1;
6174	0	0				0	if ($string =~ /\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { # qq# #
6175	0					0	$e_string .= e_qq($ope,$1,$3,$2);
6176							}
6177							else {
6178	0					0	my $e = '';
6179	0					0	while ($string !~ /\G \z/oxgc) {
6180	0	0				0	if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
6181	0					0	elsif ($string =~ /\G ($) ((?:$qq_paren)*?) ($) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qq ( )
	0					0
6182	0					0	elsif ($string =~ /\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qq { }
	0					0
6183	0					0	elsif ($string =~ /\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qq [ ]
	0					0
6184	0					0	elsif ($string =~ /\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qq < >
	0					0
6185	0					0	elsif ($string =~ /\G (\S) ((?:$qq_char)?) (\1) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qq *
	0					0
6186							}
6187	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
6188							}
6189							}
6190
6191							# qx//
6192							elsif ($string =~ /\G \b (qx) \b /oxgc) {
6193	0					0	my $ope = $1;
6194	0	0				0	if ($string =~ /\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { # qx# #
6195	0					0	$e_string .= e_qq($ope,$1,$3,$2);
6196							}
6197							else {
6198	0					0	my $e = '';
6199	0					0	while ($string !~ /\G \z/oxgc) {
6200	0	0				0	if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
		0
6201	0					0	elsif ($string =~ /\G ($) ((?:$qq_paren)*?) ($) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qx ( )
	0					0
6202	0					0	elsif ($string =~ /\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qx { }
	0					0
6203	0					0	elsif ($string =~ /\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qx [ ]
	0					0
6204	0					0	elsif ($string =~ /\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qx < >
	0					0
6205	0					0	elsif ($string =~ /\G (\') ((?:$qq_char)*?) (\') /oxgc) { $e_string .= $e . e_q ($ope,$1,$3,$2); next E_STRING_LOOP; } # qx ' '
	0					0
6206	0					0	elsif ($string =~ /\G (\S) ((?:$qq_char)?) (\1) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qx *
	0					0
6207							}
6208	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
6209							}
6210							}
6211
6212							# q//
6213							elsif ($string =~ /\G \b (q) \b /oxgc) {
6214	0					0	my $ope = $1;
6215	0	0				0	if ($string =~ /\G (\#) ((?:\\\#\|\\\\\|$q_char)*?) (\#) /oxgc) { # q# #
6216	0					0	$e_string .= e_q($ope,$1,$3,$2);
6217							}
6218							else {
6219	0					0	my $e = '';
6220	0					0	while ($string !~ /\G \z/oxgc) {
6221	0	0				0	if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0				0
		0
		0
		0
		0
6222	0					0	elsif ($string =~ /\G ($) ((?:\\\\\|\\$\|\\$\|$q_paren)*?) ($) /oxgc) { $e_string .= $e . e_q($ope,$1,$3,$2); next E_STRING_LOOP; } # q ( )
	0					0
6223	0					0	elsif ($string =~ /\G (\{) ((?:\\\\\|\\\}\|\\\{\|$q_brace)*?) (\}) /oxgc) { $e_string .= $e . e_q($ope,$1,$3,$2); next E_STRING_LOOP; } # q { }
	0					0
6224	0					0	elsif ($string =~ /\G (\[) ((?:\\\\\|\\\]\|\\\[\|$q_bracket)*?) (\]) /oxgc) { $e_string .= $e . e_q($ope,$1,$3,$2); next E_STRING_LOOP; } # q [ ]
	0					0
6225	0					0	elsif ($string =~ /\G (\<) ((?:\\\\\|\\\>\|\\\<\|$q_angle)*?) (\>) /oxgc) { $e_string .= $e . e_q($ope,$1,$3,$2); next E_STRING_LOOP; } # q < >
	0					0
6226	0					0	elsif ($string =~ /\G (\S) ((?:\\\\\|\\\1\| $q_char)?) (\1) /oxgc) { $e_string .= $e . e_q($ope,$1,$3,$2); next E_STRING_LOOP; } # q *
	0					0
6227							}
6228	0					0	die __FILE__, ": Can't find string terminator anywhere before EOF\n";
6229							}
6230							}
6231
6232							# ''
6233	12					32	elsif ($string =~ /\G (?
6234
6235							# ""
6236	6					19	elsif ($string =~ /\G (\") ((?:$qq_char)*?) (\") /oxgc) { $e_string .= e_qq('',$1,$3,$2); }
6237
6238							# ``
6239	0					0	elsif ($string =~ /\G (\`) ((?:$qq_char)*?) (\`) /oxgc) { $e_string .= e_qq('',$1,$3,$2); }
6240
6241							# <<>> (a safer ARGV)
6242	0					0	elsif ($string =~ /\G ( <<>> ) /oxgc) { $e_string .= $1; }
6243
6244							# <<= <=> <= < operator
6245	0					0	elsif ($string =~ /\G ( <<= \| <=> \| <= \| < ) (?= (?>\s) [A-Za-z_0-9'"`\$\@\&\\(\+\-] )/oxgc) { $e_string .= $1; }
6246
6247							#
6248	0					0	elsif ($string =~ /\G (<[\$]?[A-Za-z_][A-Za-z_0-9]*>) /oxgc) { $e_string .= $1; }
6249
6250							# --- glob
6251							elsif ($string =~ /\G < ((?:$q_char)+?) > /oxgc) {
6252	0					0	$e_string .= 'Eutf2::glob("' . $1 . '")';
6253							}
6254
6255							# << (bit shift) --- not here document
6256	0					0	elsif ($string =~ /\G ( << (?>\s*) ) (?= [0-9\$\@\&] ) /oxgc) { $slash = 'm//'; $e_string .= $1; }
	0					0
6257
6258							# <<'HEREDOC'
6259							elsif ($string =~ /\G ( << '([a-zA-Z_0-9]*)' ) /oxgc) {
6260	0					0	$slash = 'm//';
6261	0					0	my $here_quote = $1;
6262	0					0	my $delimiter = $2;
6263
6264							# get here document
6265	0	0				0	if ($here_script eq '') {
6266	0					0	$here_script = CORE::substr $_, pos $_;
6267	0					0	$here_script =~ s/.*?\n//oxm;
6268							}
6269	0	0				0	if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
6270	0					0	push @heredoc, $1 . qq{\n$delimiter\n};
6271	0					0	push @heredoc_delimiter, $delimiter;
6272							}
6273							else {
6274	0					0	die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
6275							}
6276	0					0	$e_string .= $here_quote;
6277							}
6278
6279							# <<\HEREDOC
6280							elsif ($string =~ /\G ( << \\([a-zA-Z_0-9]+) ) /oxgc) {
6281	0					0	$slash = 'm//';
6282	0					0	my $here_quote = $1;
6283	0					0	my $delimiter = $2;
6284
6285							# get here document
6286	0	0				0	if ($here_script eq '') {
6287	0					0	$here_script = CORE::substr $_, pos $_;
6288	0					0	$here_script =~ s/.*?\n//oxm;
6289							}
6290	0	0				0	if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
6291	0					0	push @heredoc, $1 . qq{\n$delimiter\n};
6292	0					0	push @heredoc_delimiter, $delimiter;
6293							}
6294							else {
6295	0					0	die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
6296							}
6297	0					0	$e_string .= $here_quote;
6298							}
6299
6300							# <<"HEREDOC"
6301							elsif ($string =~ /\G ( << "([a-zA-Z_0-9]*)" ) /oxgc) {
6302	0					0	$slash = 'm//';
6303	0					0	my $here_quote = $1;
6304	0					0	my $delimiter = $2;
6305
6306							# get here document
6307	0	0				0	if ($here_script eq '') {
6308	0					0	$here_script = CORE::substr $_, pos $_;
6309	0					0	$here_script =~ s/.*?\n//oxm;
6310							}
6311	0	0				0	if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
6312	0					0	push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
6313	0					0	push @heredoc_delimiter, $delimiter;
6314							}
6315							else {
6316	0					0	die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
6317							}
6318	0					0	$e_string .= $here_quote;
6319							}
6320
6321							# <
6322							elsif ($string =~ /\G ( << ([a-zA-Z_0-9]+) ) /oxgc) {
6323	0					0	$slash = 'm//';
6324	0					0	my $here_quote = $1;
6325	0					0	my $delimiter = $2;
6326
6327							# get here document
6328	0	0				0	if ($here_script eq '') {
6329	0					0	$here_script = CORE::substr $_, pos $_;
6330	0					0	$here_script =~ s/.*?\n//oxm;
6331							}
6332	0	0				0	if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
6333	0					0	push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
6334	0					0	push @heredoc_delimiter, $delimiter;
6335							}
6336							else {
6337	0					0	die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
6338							}
6339	0					0	$e_string .= $here_quote;
6340							}
6341
6342							# <<`HEREDOC`
6343							elsif ($string =~ /\G ( << `([a-zA-Z_0-9]*)` ) /oxgc) {
6344	0					0	$slash = 'm//';
6345	0					0	my $here_quote = $1;
6346	0					0	my $delimiter = $2;
6347
6348							# get here document
6349	0	0				0	if ($here_script eq '') {
6350	0					0	$here_script = CORE::substr $_, pos $_;
6351	0					0	$here_script =~ s/.*?\n//oxm;
6352							}
6353	0	0				0	if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
6354	0					0	push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
6355	0					0	push @heredoc_delimiter, $delimiter;
6356							}
6357							else {
6358	0					0	die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
6359							}
6360	0					0	$e_string .= $here_quote;
6361							}
6362
6363							# any operator before div
6364							elsif ($string =~ /\G (
6365							-- \| \+\+ \|
6366							[\)\}\]]
6367
6368	40					45	) /oxgc) { $slash = 'div'; $e_string .= $1; }
	40					106
6369
6370							# yada-yada or triple-dot operator
6371							elsif ($string =~ /\G (
6372							\.\.\.
6373
6374	0					0	) /oxgc) { $slash = 'm//'; $e_string .= q{die('Unimplemented')}; }
	0					0
6375
6376							# any operator before m//
6377							elsif ($string =~ /\G ((?>
6378
6379							!~~ \| !~ \| != \| ! \|
6380							%= \| % \|
6381							&&= \| && \| &= \| &\.= \| &\. \| & \|
6382							-= \| -> \| - \|
6383							:(?>\s*)= \|
6384							: \|
6385							<<>> \|
6386							<<= \| <=> \| <= \| < \|
6387							== \| => \| =~ \| = \|
6388							>>= \| >> \| >= \| > \|
6389							\\= \| \\ \| \= \| \ \|
6390							\+= \| \+ \|
6391							\.\. \| \.= \| \. \|
6392							\/\/= \| \/\/ \|
6393							\/= \| \/ \|
6394							\? \|
6395							\\ \|
6396							\^= \| \^\.= \| \^\. \| \^ \|
6397							\b x= \|
6398							\\|\\|= \| \\|\\| \| \\|= \| \\|\.= \| \\|\. \| \\| \|
6399							~~ \| ~\. \| ~ \|
6400							\b(?: and \| cmp \| eq \| ge \| gt \| le \| lt \| ne \| not \| or \| xor \| x )\b \|
6401							\b(?: print )\b \|
6402
6403							[,;\(\{\[]
6404
6405	50					66	)) /oxgc) { $slash = 'm//'; $e_string .= $1; }
	50					171
6406
6407							# other any character
6408	181					682	elsif ($string =~ /\G ($q_char) /oxgc) { $e_string .= $1; }
6409
6410							# system error
6411							else {
6412	0					0	die __FILE__, ": Oops, this shouldn't happen!\n";
6413							}
6414							}
6415
6416	39					146	return $e_string;
6417							}
6418
6419							#
6420							# character class
6421							#
6422							sub character_class {
6423	2930			2930	0	3050	my($char,$modifier) = @_;
6424
6425	2930	100				3395	if ($char eq '.') {
6426	115	100				189	if ($modifier =~ /s/) {
6427	23					51	return '${Eutf2::dot_s}';
6428							}
6429							else {
6430	92					159	return '${Eutf2::dot}';
6431							}
6432							}
6433							else {
6434	2815					3592	return Eutf2::classic_character_class($char);
6435							}
6436							}
6437
6438							#
6439							# escape capture ($1, $2, $3, ...)
6440							#
6441							sub e_capture {
6442
6443	469			469	0	1747	return join '', '${', $_[0], '}';
6444							}
6445
6446							#
6447							# escape transliteration (tr/// or y///)
6448							#
6449							sub e_tr {
6450	11			11	0	22	my($variable,$charclass,$e,$charclass2,$modifier) = @_;
6451	11					12	my $e_tr = '';
6452	11		100			23	$modifier \|\|= '';
6453
6454	11					12	$slash = 'div';
6455
6456							# quote character class 1
6457	11					14	$charclass = q_tr($charclass);
6458
6459							# quote character class 2
6460	11					16	$charclass2 = q_tr($charclass2);
6461
6462							# /b /B modifier
6463	11	50				22	if ($modifier =~ tr/bB//d) {
6464	0	0				0	if ($variable eq '') {
6465	0					0	$e_tr = qq{tr$charclass$e$charclass2$modifier};
6466							}
6467							else {
6468	0					0	$e_tr = qq{$variable${bind_operator}tr$charclass$e$charclass2$modifier};
6469							}
6470							}
6471							else {
6472	11	100				19	if ($variable eq '') {
6473	2					6	$e_tr = qq{Eutf2::tr(\$_,' =~ ',$charclass,$e$charclass2,'$modifier')};
6474							}
6475							else {
6476	9					23	$e_tr = qq{Eutf2::tr($variable,'$bind_operator',$charclass,$e$charclass2,'$modifier')};
6477							}
6478							}
6479
6480							# clear tr/// variable
6481	11					11	$tr_variable = '';
6482	11					7	$bind_operator = '';
6483
6484	11					79	return $e_tr;
6485							}
6486
6487							#
6488							# quote for escape transliteration (tr/// or y///)
6489							#
6490							sub q_tr {
6491	22			22	0	19	my($charclass) = @_;
6492
6493							# quote character class
6494	22	50				42	if ($charclass !~ /'/oxms) {
		0
		0
		0
		0
		0
6495	22					26	return e_q('', "'", "'", $charclass); # --> q' '
6496							}
6497							elsif ($charclass !~ /\//oxms) {
6498	0					0	return e_q('q', '/', '/', $charclass); # --> q/ /
6499							}
6500							elsif ($charclass !~ /\#/oxms) {
6501	0					0	return e_q('q', '#', '#', $charclass); # --> q# #
6502							}
6503							elsif ($charclass !~ /[\<\>]/oxms) {
6504	0					0	return e_q('q', '<', '>', $charclass); # --> q< >
6505							}
6506							elsif ($charclass !~ /[]/oxms) {
6507	0					0	return e_q('q', '(', ')', $charclass); # --> q( )
6508							}
6509							elsif ($charclass !~ /[\{\}]/oxms) {
6510	0					0	return e_q('q', '{', '}', $charclass); # --> q{ }
6511							}
6512							else {
6513	0					0	for my $char (qw( ! " $ % & * + . : = ? @ ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
6514	0	0				0	if ($charclass !~ /\Q$char\E/xms) {
6515	0					0	return e_q('q', $char, $char, $charclass);
6516							}
6517							}
6518							}
6519
6520	0					0	return e_q('q', '{', '}', $charclass);
6521							}
6522
6523							#
6524							# escape q string (q//, '')
6525							#
6526							sub e_q {
6527	1978			1978	0	3234	my($ope,$delimiter,$end_delimiter,$string) = @_;
6528
6529	1978					1915	$slash = 'div';
6530
6531	1978					8686	return join '', $ope, $delimiter, $string, $end_delimiter;
6532							}
6533
6534							#
6535							# escape qq string (qq//, "", qx//, ``)
6536							#
6537							sub e_qq {
6538	9270			9270	0	13340	my($ope,$delimiter,$end_delimiter,$string) = @_;
6539
6540	9270					8101	$slash = 'div';
6541
6542	9270					7393	my $left_e = 0;
6543	9270					6186	my $right_e = 0;
6544
6545							# split regexp
6546	9270					309290	my @char = $string =~ /\G((?>
6547							[^\x80-\xFF\\\$]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
6548							\\x\{ (?>[0-9A-Fa-f]+) \} \|
6549							\\o\{ (?>[0-7]+) \} \|
6550							\\N\{ (?>[^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} \|
6551							\\ $q_char \|
6552							\$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} \|
6553							\$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} \|
6554							\$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} \|
6555							\$ (?>\s* [0-9]+) \|
6556							\$ (?>\s) \{ (?>\s [0-9]+ \s*) \} \|
6557							\$ \$ (?![\w\{]) \|
6558							\$ (?>\s) \$ (?>\s) $qq_variable \|
6559							$q_char
6560							))/oxmsg;
6561
6562	9270					30150	for (my $i=0; $i <= $#char; $i++) {
6563
6564							# "\L\u" --> "\u\L"
6565	221891	50	66			777260	if (($char[$i] eq '\L') and ($char[$i+1] eq '\u')) {
		50	33
		100
		100
		50
6566	0					0	@char[$i,$i+1] = @char[$i+1,$i];
6567							}
6568
6569							# "\U\l" --> "\l\U"
6570							elsif (($char[$i] eq '\U') and ($char[$i+1] eq '\l')) {
6571	0					0	@char[$i,$i+1] = @char[$i+1,$i];
6572							}
6573
6574							# octal escape sequence
6575							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
6576	1					3	$char[$i] = Eutf2::octchr($1);
6577							}
6578
6579							# hexadecimal escape sequence
6580							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
6581	1					3	$char[$i] = Eutf2::hexchr($1);
6582							}
6583
6584							# \N{CHARNAME} --> N{CHARNAME}
6585							elsif ($char[$i] =~ /\A \\ ( N\{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
6586	0					0	$char[$i] = $1;
6587							}
6588
6589	221891	100				2107815	if (0) {
		50
		50
		50
		100
		100
		50
		100
		50
		50
		100
		50
		50
		100
		50
		50
		50
		50
		100
		100
		100
		50
		50
6590							}
6591
6592							# \F
6593							#
6594							# P.69 Table 2-6. Translation escapes
6595							# in Chapter 2: Bits and Pieces
6596							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
6597							# (and so on)
6598
6599							# \u \l \U \L \F \Q \E
6600	0					0	elsif ($char[$i] =~ /\A ([<>]) \z/oxms) {
6601	602	50				1288	if ($right_e < $left_e) {
6602	0					0	$char[$i] = '\\' . $char[$i];
6603							}
6604							}
6605							elsif ($char[$i] eq '\u') {
6606
6607							# "STRING @{[ LIST EXPR ]} MORE STRING"
6608
6609							# P.257 Other Tricks You Can Do with Hard References
6610							# in Chapter 8: References
6611							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
6612
6613							# P.353 Other Tricks You Can Do with Hard References
6614							# in Chapter 8: References
6615							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
6616
6617							# (and so on)
6618
6619	0					0	$char[$i] = '@{[Eutf2::ucfirst qq<';
6620	0					0	$left_e++;
6621							}
6622							elsif ($char[$i] eq '\l') {
6623	0					0	$char[$i] = '@{[Eutf2::lcfirst qq<';
6624	0					0	$left_e++;
6625							}
6626							elsif ($char[$i] eq '\U') {
6627	0					0	$char[$i] = '@{[Eutf2::uc qq<';
6628	0					0	$left_e++;
6629							}
6630							elsif ($char[$i] eq '\L') {
6631	6					7	$char[$i] = '@{[Eutf2::lc qq<';
6632	6					9	$left_e++;
6633							}
6634							elsif ($char[$i] eq '\F') {
6635	23					20	$char[$i] = '@{[Eutf2::fc qq<';
6636	23					38	$left_e++;
6637							}
6638							elsif ($char[$i] eq '\Q') {
6639	0					0	$char[$i] = '@{[CORE::quotemeta qq<';
6640	0					0	$left_e++;
6641							}
6642							elsif ($char[$i] eq '\E') {
6643	26	50				34	if ($right_e < $left_e) {
6644	26					16	$char[$i] = '>]}';
6645	26					43	$right_e++;
6646							}
6647							else {
6648	0					0	$char[$i] = '';
6649							}
6650							}
6651							elsif ($char[$i] eq '\Q') {
6652	0					0	while (1) {
6653	0	0				0	if (++$i > $#char) {
6654	0					0	last;
6655							}
6656	0	0				0	if ($char[$i] eq '\E') {
6657	0					0	last;
6658							}
6659							}
6660							}
6661							elsif ($char[$i] eq '\E') {
6662							}
6663
6664							# $0 --> $0
6665							elsif ($char[$i] =~ /\A \$ 0 \z/oxms) {
6666							}
6667							elsif ($char[$i] =~ /\A \$ \{ (?>\s) 0 (?>\s) \} \z/oxms) {
6668							}
6669
6670							# $$ --> $$
6671							elsif ($char[$i] =~ /\A \$\$ \z/oxms) {
6672							}
6673
6674							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
6675							# $1, $2, $3 --> $1, $2, $3 otherwise
6676							elsif ($char[$i] =~ /\A \$ ((?>[1-9][0-9]*)) \z/oxms) {
6677	409					688	$char[$i] = e_capture($1);
6678							}
6679							elsif ($char[$i] =~ /\A \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
6680	0					0	$char[$i] = e_capture($1);
6681							}
6682
6683							# $$foo[ ... ] --> $ $foo->[ ... ]
6684							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ (?:$qq_bracket)*? \] ) \z/oxms) {
6685	0					0	$char[$i] = e_capture($1.'->'.$2);
6686							}
6687
6688							# $$foo{ ... } --> $ $foo->{ ... }
6689							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ (?:$qq_brace)*? \} ) \z/oxms) {
6690	0					0	$char[$i] = e_capture($1.'->'.$2);
6691							}
6692
6693							# $$foo
6694							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) \z/oxms) {
6695	0					0	$char[$i] = e_capture($1);
6696							}
6697
6698							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
6699							elsif ($char[$i] =~ /\A ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) \z/oxmsgc) {
6700	44					95	$char[$i] = '@{[Eutf2::PREMATCH()]}';
6701							}
6702
6703							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
6704							elsif ($char[$i] =~ /\A ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) \z/oxmsgc) {
6705	45					108	$char[$i] = '@{[Eutf2::MATCH()]}';
6706							}
6707
6708							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
6709							elsif ($char[$i] =~ /\A ( \$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) \z/oxmsgc) {
6710	33					69	$char[$i] = '@{[Eutf2::POSTMATCH()]}';
6711							}
6712
6713							# ${ foo } --> ${ foo }
6714							elsif ($char[$i] =~ /\A \$ (?>\s) \{ (?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* ) \} \z/oxms) {
6715							}
6716
6717							# ${ ... }
6718							elsif ($char[$i] =~ /\A \$ (?>\s*) \{ ( .+ ) \} \z/oxms) {
6719	0					0	$char[$i] = e_capture($1);
6720							}
6721							}
6722
6723							# return string
6724	9270	100				13337	if ($left_e > $right_e) {
6725	3					18	return join '', $ope, $delimiter, @char, '>]}' x ($left_e - $right_e), $end_delimiter;
6726							}
6727	9267					63755	return join '', $ope, $delimiter, @char, $end_delimiter;
6728							}
6729
6730							#
6731							# escape qw string (qw//)
6732							#
6733							sub e_qw {
6734	34			34	0	142	my($ope,$delimiter,$end_delimiter,$string) = @_;
6735
6736	34					41	$slash = 'div';
6737
6738							# choice again delimiter
6739	34					345	my %octet = map {$_ => 1} ($string =~ /\G ([\x00-\xFF]) /oxmsg);
	856					883
6740	34	50				170	if (not $octet{$end_delimiter}) {
		0
		0
		0
		0
6741	34					223	return join '', $ope, $delimiter, $string, $end_delimiter;
6742							}
6743							elsif (not $octet{')'}) {
6744	0					0	return join '', $ope, '(', $string, ')';
6745							}
6746							elsif (not $octet{'}'}) {
6747	0					0	return join '', $ope, '{', $string, '}';
6748							}
6749							elsif (not $octet{']'}) {
6750	0					0	return join '', $ope, '[', $string, ']';
6751							}
6752							elsif (not $octet{'>'}) {
6753	0					0	return join '', $ope, '<', $string, '>';
6754							}
6755							else {
6756	0					0	for my $char (qw( ! " $ % & * + - . / : = ? @ ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
6757	0	0				0	if (not $octet{$char}) {
6758	0					0	return join '', $ope, $char, $string, $char;
6759							}
6760							}
6761							}
6762
6763							# qw/AAA BBB C'CC/ --> ('AAA', 'BBB', 'C\'CC')
6764	0					0	my @string = CORE::split(/\s+/, $string);
6765	0					0	for my $string (@string) {
6766	0					0	my @octet = $string =~ /\G ([\x00-\xFF]) /oxmsg;
6767	0					0	for my $octet (@octet) {
6768	0	0				0	if ($octet =~ /\A (['\\]) \z/oxms) {
6769	0					0	$octet = '\\' . $1;
6770							}
6771							}
6772	0					0	$string = join '', @octet;
6773							}
6774	0					0	return join '', '(', (join ', ', map { "'$_'" } @string), ')';
	0					0
6775							}
6776
6777							#
6778							# escape here document (<<"HEREDOC", <
6779							#
6780							sub e_heredoc {
6781	93			93	0	190	my($string) = @_;
6782
6783	93					109	$slash = 'm//';
6784
6785	93					287	my $metachar = qr/[\@\\\|]/oxms; # '\|' is for <<`HEREDOC`
6786
6787	93					102	my $left_e = 0;
6788	93					85	my $right_e = 0;
6789
6790							# split regexp
6791	93					13123	my @char = $string =~ /\G((?>
6792							[^\x80-\xFF\\\$]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
6793							\\x\{ (?>[0-9A-Fa-f]+) \} \|
6794							\\o\{ (?>[0-7]+) \} \|
6795							\\N\{ (?>[^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} \|
6796							\\ $q_char \|
6797							\$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} \|
6798							\$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} \|
6799							\$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} \|
6800							\$ (?>\s* [0-9]+) \|
6801							\$ (?>\s) \{ (?>\s [0-9]+ \s*) \} \|
6802							\$ \$ (?![\w\{]) \|
6803							\$ (?>\s) \$ (?>\s) $qq_variable \|
6804							$q_char
6805							))/oxmsg;
6806
6807	93					644	for (my $i=0; $i <= $#char; $i++) {
6808
6809							# "\L\u" --> "\u\L"
6810	3010	50	66			10494	if (($char[$i] eq '\L') and ($char[$i+1] eq '\u')) {
		50	33
		100
		100
		50
6811	0					0	@char[$i,$i+1] = @char[$i+1,$i];
6812							}
6813
6814							# "\U\l" --> "\l\U"
6815							elsif (($char[$i] eq '\U') and ($char[$i+1] eq '\l')) {
6816	0					0	@char[$i,$i+1] = @char[$i+1,$i];
6817							}
6818
6819							# octal escape sequence
6820							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
6821	1					3	$char[$i] = Eutf2::octchr($1);
6822							}
6823
6824							# hexadecimal escape sequence
6825							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
6826	1					2	$char[$i] = Eutf2::hexchr($1);
6827							}
6828
6829							# \N{CHARNAME} --> N{CHARNAME}
6830							elsif ($char[$i] =~ /\A \\ ( N\{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
6831	0					0	$char[$i] = $1;
6832							}
6833
6834	3010	100				31495	if (0) {
		50
		50
		50
		100
		50
		50
		100
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		100
		100
		100
		50
		50
6835							}
6836
6837							# \u \l \U \L \F \Q \E
6838	0					0	elsif ($char[$i] =~ /\A ([<>]) \z/oxms) {
6839	72	50				134	if ($right_e < $left_e) {
6840	0					0	$char[$i] = '\\' . $char[$i];
6841							}
6842							}
6843							elsif ($char[$i] eq '\u') {
6844	0					0	$char[$i] = '@{[Eutf2::ucfirst qq<';
6845	0					0	$left_e++;
6846							}
6847							elsif ($char[$i] eq '\l') {
6848	0					0	$char[$i] = '@{[Eutf2::lcfirst qq<';
6849	0					0	$left_e++;
6850							}
6851							elsif ($char[$i] eq '\U') {
6852	0					0	$char[$i] = '@{[Eutf2::uc qq<';
6853	0					0	$left_e++;
6854							}
6855							elsif ($char[$i] eq '\L') {
6856	6					5	$char[$i] = '@{[Eutf2::lc qq<';
6857	6					12	$left_e++;
6858							}
6859							elsif ($char[$i] eq '\F') {
6860	0					0	$char[$i] = '@{[Eutf2::fc qq<';
6861	0					0	$left_e++;
6862							}
6863							elsif ($char[$i] eq '\Q') {
6864	0					0	$char[$i] = '@{[CORE::quotemeta qq<';
6865	0					0	$left_e++;
6866							}
6867							elsif ($char[$i] eq '\E') {
6868	3	50				5	if ($right_e < $left_e) {
6869	3					4	$char[$i] = '>]}';
6870	3					4	$right_e++;
6871							}
6872							else {
6873	0					0	$char[$i] = '';
6874							}
6875							}
6876							elsif ($char[$i] eq '\Q') {
6877	0					0	while (1) {
6878	0	0				0	if (++$i > $#char) {
6879	0					0	last;
6880							}
6881	0	0				0	if ($char[$i] eq '\E') {
6882	0					0	last;
6883							}
6884							}
6885							}
6886							elsif ($char[$i] eq '\E') {
6887							}
6888
6889							# $0 --> $0
6890							elsif ($char[$i] =~ /\A \$ 0 \z/oxms) {
6891							}
6892							elsif ($char[$i] =~ /\A \$ \{ (?>\s) 0 (?>\s) \} \z/oxms) {
6893							}
6894
6895							# $$ --> $$
6896							elsif ($char[$i] =~ /\A \$\$ \z/oxms) {
6897							}
6898
6899							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
6900							# $1, $2, $3 --> $1, $2, $3 otherwise
6901							elsif ($char[$i] =~ /\A \$ ((?>[1-9][0-9]*)) \z/oxms) {
6902	0					0	$char[$i] = e_capture($1);
6903							}
6904							elsif ($char[$i] =~ /\A \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
6905	0					0	$char[$i] = e_capture($1);
6906							}
6907
6908							# $$foo[ ... ] --> $ $foo->[ ... ]
6909							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ (?:$qq_bracket)*? \] ) \z/oxms) {
6910	0					0	$char[$i] = e_capture($1.'->'.$2);
6911							}
6912
6913							# $$foo{ ... } --> $ $foo->{ ... }
6914							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ (?:$qq_brace)*? \} ) \z/oxms) {
6915	0					0	$char[$i] = e_capture($1.'->'.$2);
6916							}
6917
6918							# $$foo
6919							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) \z/oxms) {
6920	0					0	$char[$i] = e_capture($1);
6921							}
6922
6923							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
6924							elsif ($char[$i] =~ /\A ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) \z/oxmsgc) {
6925	8					46	$char[$i] = '@{[Eutf2::PREMATCH()]}';
6926							}
6927
6928							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
6929							elsif ($char[$i] =~ /\A ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) \z/oxmsgc) {
6930	8					52	$char[$i] = '@{[Eutf2::MATCH()]}';
6931							}
6932
6933							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
6934							elsif ($char[$i] =~ /\A ( \$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) \z/oxmsgc) {
6935	6					34	$char[$i] = '@{[Eutf2::POSTMATCH()]}';
6936							}
6937
6938							# ${ foo } --> ${ foo }
6939							elsif ($char[$i] =~ /\A \$ (?>\s) \{ (?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* ) \} \z/oxms) {
6940							}
6941
6942							# ${ ... }
6943							elsif ($char[$i] =~ /\A \$ (?>\s*) \{ ( .+ ) \} \z/oxms) {
6944	0					0	$char[$i] = e_capture($1);
6945							}
6946							}
6947
6948							# return string
6949	93	100				187	if ($left_e > $right_e) {
6950	3					20	return join '', @char, '>]}' x ($left_e - $right_e);
6951							}
6952	90					656	return join '', @char;
6953							}
6954
6955							#
6956							# escape regexp (m//, qr//)
6957							#
6958							sub e_qr {
6959	1376			1376	0	2961	my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
6960	1376		100			3674	$modifier \|\|= '';
6961
6962	1376					1921	$modifier =~ tr/p//d;
6963	1376	50				2980	if ($modifier =~ /([adlu])/oxms) {
6964	0					0	my $line = 0;
6965	0					0	for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
6966	0	0				0	if ($filename ne __FILE__) {
6967	0					0	$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
6968	0					0	last;
6969							}
6970							}
6971	0					0	die qq{Unsupported modifier "$1" used at line $line.\n};
6972							}
6973
6974	1376					1582	$slash = 'div';
6975
6976							# literal null string pattern
6977	1376	100				3369	if ($string eq '') {
		100
6978	8					7	$modifier =~ tr/bB//d;
6979	8					6	$modifier =~ tr/i//d;
6980	8					32	return join '', $ope, $delimiter, $end_delimiter, $modifier;
6981							}
6982
6983							# /b /B modifier
6984							elsif ($modifier =~ tr/bB//d) {
6985
6986							# choice again delimiter
6987	25	50				101	if ($delimiter =~ / [\@:] /oxms) {
6988	0					0	my @char = $string =~ /\G ([\x00-\xFF]) /oxmsg;
6989	0					0	my %octet = map {$_ => 1} @char;
	0					0
6990	0	0				0	if (not $octet{')'}) {
		0
		0
		0
6991	0					0	$delimiter = '(';
6992	0					0	$end_delimiter = ')';
6993							}
6994							elsif (not $octet{'}'}) {
6995	0					0	$delimiter = '{';
6996	0					0	$end_delimiter = '}';
6997							}
6998							elsif (not $octet{']'}) {
6999	0					0	$delimiter = '[';
7000	0					0	$end_delimiter = ']';
7001							}
7002							elsif (not $octet{'>'}) {
7003	0					0	$delimiter = '<';
7004	0					0	$end_delimiter = '>';
7005							}
7006							else {
7007	0					0	for my $char (qw( ! " $ % & * + - . / = ? ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
7008	0	0				0	if (not $octet{$char}) {
7009	0					0	$delimiter = $char;
7010	0					0	$end_delimiter = $char;
7011	0					0	last;
7012							}
7013							}
7014							}
7015							}
7016
7017	25	100	100			152	if (($ope =~ /\A m? \z/oxms) and ($delimiter eq '?')) {
7018	4					28	return join '', $ope, $delimiter, $string, $matched, $end_delimiter, $modifier;
7019							}
7020							else {
7021	21					130	return join '', $ope, $delimiter, '(?:', $string, ')', $matched, $end_delimiter, $modifier;
7022							}
7023							}
7024
7025	1343	100				2487	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
7026	1343					4124	my $metachar = qr/[\@\\\|[\]{^]/oxms;
7027
7028							# split regexp
7029	1343					152497	my @char = $string =~ /\G((?>
7030							[^\x80-\xFF\\\$\@\[\(]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
7031							\\x (?>[0-9A-Fa-f]{1,2}) \|
7032							\\ (?>[0-7]{2,3}) \|
7033							\\c [\x40-\x5F] \|
7034							\\x\{ (?>[0-9A-Fa-f]+) \} \|
7035							\\o\{ (?>[0-7]+) \} \|
7036							\\[bBNpP]\{ (?>[^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} \|
7037							\\ $q_char \|
7038							\$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} \|
7039							\$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} \|
7040							\$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} \|
7041							[\$\@] $qq_variable \|
7042							\$ (?>\s* [0-9]+) \|
7043							\$ (?>\s) \{ (?>\s [0-9]+ \s*) \} \|
7044							\$ \$ (?![\w\{]) \|
7045							\$ (?>\s) \$ (?>\s) $qq_variable \|
7046							\[\^ \|
7047							\[\: (?>[a-z]+) :\] \|
7048							\[\:\^ (?>[a-z]+) :\] \|
7049							\(\? \|
7050							$q_char
7051							))/oxmsg;
7052
7053							# choice again delimiter
7054	1343	50				7358	if ($delimiter =~ / [\@:] /oxms) {
7055	0					0	my %octet = map {$_ => 1} @char;
	0					0
7056	0	0				0	if (not $octet{')'}) {
		0
		0
		0
7057	0					0	$delimiter = '(';
7058	0					0	$end_delimiter = ')';
7059							}
7060							elsif (not $octet{'}'}) {
7061	0					0	$delimiter = '{';
7062	0					0	$end_delimiter = '}';
7063							}
7064							elsif (not $octet{']'}) {
7065	0					0	$delimiter = '[';
7066	0					0	$end_delimiter = ']';
7067							}
7068							elsif (not $octet{'>'}) {
7069	0					0	$delimiter = '<';
7070	0					0	$end_delimiter = '>';
7071							}
7072							else {
7073	0					0	for my $char (qw( ! " $ % & * + - . / = ? ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
7074	0	0				0	if (not $octet{$char}) {
7075	0					0	$delimiter = $char;
7076	0					0	$end_delimiter = $char;
7077	0					0	last;
7078							}
7079							}
7080							}
7081							}
7082
7083	1343					1358	my $left_e = 0;
7084	1343					1266	my $right_e = 0;
7085	1343					3114	for (my $i=0; $i <= $#char; $i++) {
7086
7087							# "\L\u" --> "\u\L"
7088	3226	50	66			17611	if (($char[$i] eq '\L') and ($char[$i+1] eq '\u')) {
		50	66
		100
		100
		100
		100
7089	0					0	@char[$i,$i+1] = @char[$i+1,$i];
7090							}
7091
7092							# "\U\l" --> "\l\U"
7093							elsif (($char[$i] eq '\U') and ($char[$i+1] eq '\l')) {
7094	0					0	@char[$i,$i+1] = @char[$i+1,$i];
7095							}
7096
7097							# octal escape sequence
7098							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
7099	1					3	$char[$i] = Eutf2::octchr($1);
7100							}
7101
7102							# hexadecimal escape sequence
7103							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
7104	1					3	$char[$i] = Eutf2::hexchr($1);
7105							}
7106
7107							# \b{...} --> b\{...}
7108							# \B{...} --> B\{...}
7109							# \N{CHARNAME} --> N\{CHARNAME}
7110							# \p{PROPERTY} --> p\{PROPERTY}
7111							# \P{PROPERTY} --> P\{PROPERTY}
7112							elsif ($char[$i] =~ /\A \\ ([bBNpP]) ( \{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
7113	6					15	$char[$i] = $1 . '\\' . $2;
7114							}
7115
7116							# \p, \P, \X --> p, P, X
7117							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
7118	4					9	$char[$i] = $1;
7119							}
7120
7121	3226	100	100			8726	if (0) {
		100	100
		100	100
		100
		100
		100
		50
		50
		100
		100
		100
		100
		100
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		100
		100
		100
		50
		50
		100
		100
7122							}
7123
7124							# join separated multiple-octet
7125	0					0	elsif ($char[$i] =~ /\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms) {
7126	6	50	33			93	if ( ($i+3 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+3]) == 3) and (CORE::eval(sprintf '"%s%s%s%s"', @char[$i..$i+3]) =~ /\A $q_char \z/oxms)) {
		50	33
		50	33
			33
			66
			33
7127	0					0	$char[$i] .= join '', splice @char, $i+1, 3;
7128							}
7129							elsif (($i+2 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+2]) == 2) and (CORE::eval(sprintf '"%s%s%s"', @char[$i..$i+2]) =~ /\A $q_char \z/oxms)) {
7130	0					0	$char[$i] .= join '', splice @char, $i+1, 2;
7131							}
7132							elsif (($i+1 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, $char[$i+1 ]) == 1) and (CORE::eval(sprintf '"%s%s"', @char[$i..$i+1]) =~ /\A $q_char \z/oxms)) {
7133	0					0	$char[$i] .= join '', splice @char, $i+1, 1;
7134							}
7135							}
7136
7137							# open character class [...]
7138							elsif ($char[$i] eq '[') {
7139	598					591	my $left = $i;
7140
7141							# [] make die "Unmatched [] in regexp ...\n"
7142							# (and so on)
7143
7144	598	100				1338	if ($char[$i+1] eq ']') {
7145	3					6	$i++;
7146							}
7147
7148	598					489	while (1) {
7149	2607	50				3170	if (++$i > $#char) {
7150	0					0	die __FILE__, ": Unmatched [] in regexp\n";
7151							}
7152	2607	100				3526	if ($char[$i] eq ']') {
7153	598					508	my $right = $i;
7154
7155							# [...]
7156	598	100				3024	if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
7157	90					151	splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	270					291
7158							}
7159							else {
7160	508					1845	splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
7161							}
7162
7163	598					778	$i = $left;
7164	598					1534	last;
7165							}
7166							}
7167							}
7168
7169							# open character class [^...]
7170							elsif ($char[$i] eq '[^') {
7171	328					274	my $left = $i;
7172
7173							# [^] make die "Unmatched [] in regexp ...\n"
7174							# (and so on)
7175
7176	328	100				675	if ($char[$i+1] eq ']') {
7177	5					6	$i++;
7178							}
7179
7180	328					252	while (1) {
7181	1447	50				1684	if (++$i > $#char) {
7182	0					0	die __FILE__, ": Unmatched [] in regexp\n";
7183							}
7184	1447	100				1904	if ($char[$i] eq ']') {
7185	328					282	my $right = $i;
7186
7187							# [^...]
7188	328	100				1470	if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
7189	90					164	splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_not_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	270					316
7190							}
7191							else {
7192	238					797	splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
7193							}
7194
7195	328					440	$i = $left;
7196	328					831	last;
7197							}
7198							}
7199							}
7200
7201							# rewrite character class or escape character
7202							elsif (my $char = character_class($char[$i],$modifier)) {
7203	215					647	$char[$i] = $char;
7204							}
7205
7206							# /i modifier
7207							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
7208	44	50				61	if (CORE::length(Eutf2::fc($char[$i])) == 1) {
7209	44					59	$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
7210							}
7211							else {
7212	0					0	$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
7213							}
7214							}
7215
7216							# \u \l \U \L \F \Q \E
7217							elsif ($char[$i] =~ /\A [<>] \z/oxms) {
7218	1	50				4	if ($right_e < $left_e) {
7219	0					0	$char[$i] = '\\' . $char[$i];
7220							}
7221							}
7222							elsif ($char[$i] eq '\u') {
7223	0					0	$char[$i] = '@{[Eutf2::ucfirst qq<';
7224	0					0	$left_e++;
7225							}
7226							elsif ($char[$i] eq '\l') {
7227	0					0	$char[$i] = '@{[Eutf2::lcfirst qq<';
7228	0					0	$left_e++;
7229							}
7230							elsif ($char[$i] eq '\U') {
7231	1					2	$char[$i] = '@{[Eutf2::uc qq<';
7232	1					4	$left_e++;
7233							}
7234							elsif ($char[$i] eq '\L') {
7235	1					2	$char[$i] = '@{[Eutf2::lc qq<';
7236	1					4	$left_e++;
7237							}
7238							elsif ($char[$i] eq '\F') {
7239	16					17	$char[$i] = '@{[Eutf2::fc qq<';
7240	16					61	$left_e++;
7241							}
7242							elsif ($char[$i] eq '\Q') {
7243	20					20	$char[$i] = '@{[CORE::quotemeta qq<';
7244	20					62	$left_e++;
7245							}
7246							elsif ($char[$i] eq '\E') {
7247	38	50				51	if ($right_e < $left_e) {
7248	38					41	$char[$i] = '>]}';
7249	38					119	$right_e++;
7250							}
7251							else {
7252	0					0	$char[$i] = '';
7253							}
7254							}
7255							elsif ($char[$i] eq '\Q') {
7256	0					0	while (1) {
7257	0	0				0	if (++$i > $#char) {
7258	0					0	last;
7259							}
7260	0	0				0	if ($char[$i] eq '\E') {
7261	0					0	last;
7262							}
7263							}
7264							}
7265							elsif ($char[$i] eq '\E') {
7266							}
7267
7268							# $0 --> $0
7269							elsif ($char[$i] =~ /\A \$ 0 \z/oxms) {
7270	0	0				0	if ($ignorecase) {
7271	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7272							}
7273							}
7274							elsif ($char[$i] =~ /\A \$ \{ (?>\s) 0 (?>\s) \} \z/oxms) {
7275	0	0				0	if ($ignorecase) {
7276	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7277							}
7278							}
7279
7280							# $$ --> $$
7281							elsif ($char[$i] =~ /\A \$\$ \z/oxms) {
7282							}
7283
7284							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
7285							# $1, $2, $3 --> $1, $2, $3 otherwise
7286							elsif ($char[$i] =~ /\A \$ ((?>[1-9][0-9]*)) \z/oxms) {
7287	0					0	$char[$i] = e_capture($1);
7288	0	0				0	if ($ignorecase) {
7289	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7290							}
7291							}
7292							elsif ($char[$i] =~ /\A \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
7293	0					0	$char[$i] = e_capture($1);
7294	0	0				0	if ($ignorecase) {
7295	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7296							}
7297							}
7298
7299							# $$foo[ ... ] --> $ $foo->[ ... ]
7300							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ (?:$qq_bracket)*? \] ) \z/oxms) {
7301	0					0	$char[$i] = e_capture($1.'->'.$2);
7302	0	0				0	if ($ignorecase) {
7303	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7304							}
7305							}
7306
7307							# $$foo{ ... } --> $ $foo->{ ... }
7308							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ (?:$qq_brace)*? \} ) \z/oxms) {
7309	0					0	$char[$i] = e_capture($1.'->'.$2);
7310	0	0				0	if ($ignorecase) {
7311	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7312							}
7313							}
7314
7315							# $$foo
7316							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) \z/oxms) {
7317	0					0	$char[$i] = e_capture($1);
7318	0	0				0	if ($ignorecase) {
7319	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7320							}
7321							}
7322
7323							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
7324							elsif ($char[$i] =~ /\A ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) \z/oxmsgc) {
7325	8	50				15	if ($ignorecase) {
7326	0					0	$char[$i] = '@{[Eutf2::ignorecase(Eutf2::PREMATCH())]}';
7327							}
7328							else {
7329	8					41	$char[$i] = '@{[Eutf2::PREMATCH()]}';
7330							}
7331							}
7332
7333							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
7334							elsif ($char[$i] =~ /\A ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) \z/oxmsgc) {
7335	8	50				16	if ($ignorecase) {
7336	0					0	$char[$i] = '@{[Eutf2::ignorecase(Eutf2::MATCH())]}';
7337							}
7338							else {
7339	8					32	$char[$i] = '@{[Eutf2::MATCH()]}';
7340							}
7341							}
7342
7343							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
7344							elsif ($char[$i] =~ /\A ( \$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) \z/oxmsgc) {
7345	6	50				14	if ($ignorecase) {
7346	0					0	$char[$i] = '@{[Eutf2::ignorecase(Eutf2::POSTMATCH())]}';
7347							}
7348							else {
7349	6					22	$char[$i] = '@{[Eutf2::POSTMATCH()]}';
7350							}
7351							}
7352
7353							# ${ foo }
7354							elsif ($char[$i] =~ /\A \$ (?>\s) \{ ((?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* )) \} \z/oxms) {
7355	0	0				0	if ($ignorecase) {
7356	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7357							}
7358							}
7359
7360							# ${ ... }
7361							elsif ($char[$i] =~ /\A \$ (?>\s*) \{ ( .+ ) \} \z/oxms) {
7362	0					0	$char[$i] = e_capture($1);
7363	0	0				0	if ($ignorecase) {
7364	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7365							}
7366							}
7367
7368							# $scalar or @array
7369							elsif ($char[$i] =~ /\A [\$\@].+ /oxms) {
7370	42					79	$char[$i] = e_string($char[$i]);
7371	42	100				163	if ($ignorecase) {
7372	9					40	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7373							}
7374							}
7375
7376							# quote character before ? + * {
7377							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
7378	188	100	66			1354	if ($char[$i-1] =~ /\A (?:[\x00-\xFF]\|\\[0-7]{2,3}\|\\x[0-9-A-Fa-f]{1,2}) \z/oxms) {
		50
7379							}
7380							elsif (($ope =~ /\A m? \z/oxms) and ($delimiter eq '?')) {
7381	0					0	my $char = $char[$i-1];
7382	0	0				0	if ($char[$i] eq '{') {
7383	0					0	die __FILE__, qq{: "MULTIBYTE{n}" should be "(MULTIBYTE){n}" in m?? (and shift \$1,\$2,\$3,...) ($char){n}\n};
7384							}
7385							else {
7386	0					0	die __FILE__, qq{: "MULTIBYTE$char[$i]" should be "(MULTIBYTE)$char[$i]" in m?? (and shift \$1,\$2,\$3,...) ($char)$char[$i]\n};
7387							}
7388							}
7389							else {
7390	187					917	$char[$i-1] = '(?:' . $char[$i-1] . ')';
7391							}
7392							}
7393							}
7394
7395							# make regexp string
7396	1343					1559	$modifier =~ tr/i//d;
7397	1343	50				2287	if ($left_e > $right_e) {
7398	0	0	0			0	if (($ope =~ /\A m? \z/oxms) and ($delimiter eq '?')) {
7399	0					0	return join '', $ope, $delimiter, $anchor, @char, '>]}' x ($left_e - $right_e), $matched, $end_delimiter, $modifier;
7400							}
7401							else {
7402	0					0	return join '', $ope, $delimiter, $anchor, '(?:', @char, '>]}' x ($left_e - $right_e), ')', $matched, $end_delimiter, $modifier;
7403							}
7404							}
7405	1343	100	100			6740	if (($ope =~ /\A m? \z/oxms) and ($delimiter eq '?')) {
7406	32					249	return join '', $ope, $delimiter, $anchor, @char, $matched, $end_delimiter, $modifier;
7407							}
7408							else {
7409	1311					9921	return join '', $ope, $delimiter, $anchor, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
7410							}
7411							}
7412
7413							#
7414							# double quote stuff
7415							#
7416							sub qq_stuff {
7417	540			540	0	476	my($delimiter,$end_delimiter,$stuff) = @_;
7418
7419							# scalar variable or array variable
7420	540	100				895	if ($stuff =~ /\A [\$\@] /oxms) {
7421	300					834	return $stuff;
7422							}
7423
7424							# quote by delimiter
7425	240					419	my %octet = map {$_ => 1} ($stuff =~ /\G ([\x00-\xFF]) /oxmsg);
	320					675
7426	240					452	for my $char (qw( ! " $ % & * + - . / : = ? @ ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
7427	240	50				342	next if $char eq $delimiter;
7428	240	50				300	next if $char eq $end_delimiter;
7429	240	50				357	if (not $octet{$char}) {
7430	240					810	return join '', 'qq', $char, $stuff, $char;
7431							}
7432							}
7433	0					0	return join '', 'qq', '<', $stuff, '>';
7434							}
7435
7436							#
7437							# escape regexp (m'', qr'', and m''b, qr''b)
7438							#
7439							sub e_qr_q {
7440	15			15	0	46	my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
7441	15		100			87	$modifier \|\|= '';
7442
7443	15					21	$modifier =~ tr/p//d;
7444	15	50				35	if ($modifier =~ /([adlu])/oxms) {
7445	0					0	my $line = 0;
7446	0					0	for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
7447	0	0				0	if ($filename ne __FILE__) {
7448	0					0	$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
7449	0					0	last;
7450							}
7451							}
7452	0					0	die qq{Unsupported modifier "$1" used at line $line.\n};
7453							}
7454
7455	15					17	$slash = 'div';
7456
7457							# literal null string pattern
7458	15	100				41	if ($string eq '') {
		100
7459	8					4	$modifier =~ tr/bB//d;
7460	8					6	$modifier =~ tr/i//d;
7461	8					42	return join '', $ope, $delimiter, $end_delimiter, $modifier;
7462							}
7463
7464							# with /b /B modifier
7465							elsif ($modifier =~ tr/bB//d) {
7466	3					9	return e_qr_qb($ope,$delimiter,$end_delimiter,$string,$modifier);
7467							}
7468
7469							# without /b /B modifier
7470							else {
7471	4					15	return e_qr_qt($ope,$delimiter,$end_delimiter,$string,$modifier);
7472							}
7473							}
7474
7475							#
7476							# escape regexp (m'', qr'')
7477							#
7478							sub e_qr_qt {
7479	4			4	0	9	my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
7480
7481	4	50				23	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
7482
7483							# split regexp
7484	4					395	my @char = $string =~ /\G((?>
7485							[^\x80-\xFF\\\[\$\@\/] \|
7486							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
7487							\[\^ \|
7488							\[\: (?>[a-z]+) \:\] \|
7489							\[\:\^ (?>[a-z]+) \:\] \|
7490							[\$\@\/] \|
7491							\\ (?:$q_char) \|
7492							(?:$q_char)
7493							))/oxmsg;
7494
7495							# unescape character
7496	4					32	for (my $i=0; $i <= $#char; $i++) {
7497	5	50	33			33	if (0) {
		50	33
		50	66
		50
		50
		50
7498							}
7499
7500							# open character class [...]
7501	0					0	elsif ($char[$i] eq '[') {
7502	0					0	my $left = $i;
7503	0	0				0	if ($char[$i+1] eq ']') {
7504	0					0	$i++;
7505							}
7506	0					0	while (1) {
7507	0	0				0	if (++$i > $#char) {
7508	0					0	die __FILE__, ": Unmatched [] in regexp\n";
7509							}
7510	0	0				0	if ($char[$i] eq ']') {
7511	0					0	my $right = $i;
7512
7513							# [...]
7514	0					0	splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
7515
7516	0					0	$i = $left;
7517	0					0	last;
7518							}
7519							}
7520							}
7521
7522							# open character class [^...]
7523							elsif ($char[$i] eq '[^') {
7524	0					0	my $left = $i;
7525	0	0				0	if ($char[$i+1] eq ']') {
7526	0					0	$i++;
7527							}
7528	0					0	while (1) {
7529	0	0				0	if (++$i > $#char) {
7530	0					0	die __FILE__, ": Unmatched [] in regexp\n";
7531							}
7532	0	0				0	if ($char[$i] eq ']') {
7533	0					0	my $right = $i;
7534
7535							# [^...]
7536	0					0	splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
7537
7538	0					0	$i = $left;
7539	0					0	last;
7540							}
7541							}
7542							}
7543
7544							# escape $ @ / and \
7545							elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
7546	0					0	$char[$i] = '\\' . $char[$i];
7547							}
7548
7549							# rewrite character class or escape character
7550							elsif (my $char = character_class($char[$i],$modifier)) {
7551	0					0	$char[$i] = $char;
7552							}
7553
7554							# /i modifier
7555							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
7556	0	0				0	if (CORE::length(Eutf2::fc($char[$i])) == 1) {
7557	0					0	$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
7558							}
7559							else {
7560	0					0	$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
7561							}
7562							}
7563
7564							# quote character before ? + * {
7565							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
7566	0	0				0	if ($char[$i-1] =~ /\A [\x00-\xFF] \z/oxms) {
7567							}
7568							else {
7569	0					0	$char[$i-1] = '(?:' . $char[$i-1] . ')';
7570							}
7571							}
7572							}
7573
7574	4					7	$delimiter = '/';
7575	4					5	$end_delimiter = '/';
7576
7577	4					7	$modifier =~ tr/i//d;
7578	4					45	return join '', $ope, $delimiter, $anchor, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
7579							}
7580
7581							#
7582							# escape regexp (m''b, qr''b)
7583							#
7584							sub e_qr_qb {
7585	3			3	0	5	my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
7586
7587							# split regexp
7588	3					14	my @char = $string =~ /\G ((?>[^\\]\|\\\\\|[\x00-\xFF])) /oxmsg;
7589
7590							# unescape character
7591	3					10	for (my $i=0; $i <= $#char; $i++) {
7592	9	50				34	if (0) {
		50
7593							}
7594
7595							# remain \\
7596	0					0	elsif ($char[$i] eq '\\\\') {
7597							}
7598
7599							# escape $ @ / and \
7600							elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
7601	0					0	$char[$i] = '\\' . $char[$i];
7602							}
7603							}
7604
7605	3					4	$delimiter = '/';
7606	3					5	$end_delimiter = '/';
7607	3					17	return join '', $ope, $delimiter, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
7608							}
7609
7610							#
7611							# escape regexp (s/here//)
7612							#
7613							sub e_s1 {
7614	110			110	0	191	my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
7615	110		100			357	$modifier \|\|= '';
7616
7617	110					130	$modifier =~ tr/p//d;
7618	110	50				271	if ($modifier =~ /([adlu])/oxms) {
7619	0					0	my $line = 0;
7620	0					0	for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
7621	0	0				0	if ($filename ne __FILE__) {
7622	0					0	$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
7623	0					0	last;
7624							}
7625							}
7626	0					0	die qq{Unsupported modifier "$1" used at line $line.\n};
7627							}
7628
7629	110					163	$slash = 'div';
7630
7631							# literal null string pattern
7632	110	100				365	if ($string eq '') {
		100
7633	8					6	$modifier =~ tr/bB//d;
7634	8					7	$modifier =~ tr/i//d;
7635	8					44	return join '', $ope, $delimiter, $end_delimiter, $modifier;
7636							}
7637
7638							# /b /B modifier
7639							elsif ($modifier =~ tr/bB//d) {
7640
7641							# choice again delimiter
7642	1	50				8	if ($delimiter =~ / [\@:] /oxms) {
7643	0					0	my @char = $string =~ /\G ([\x00-\xFF]) /oxmsg;
7644	0					0	my %octet = map {$_ => 1} @char;
	0					0
7645	0	0				0	if (not $octet{')'}) {
		0
		0
		0
7646	0					0	$delimiter = '(';
7647	0					0	$end_delimiter = ')';
7648							}
7649							elsif (not $octet{'}'}) {
7650	0					0	$delimiter = '{';
7651	0					0	$end_delimiter = '}';
7652							}
7653							elsif (not $octet{']'}) {
7654	0					0	$delimiter = '[';
7655	0					0	$end_delimiter = ']';
7656							}
7657							elsif (not $octet{'>'}) {
7658	0					0	$delimiter = '<';
7659	0					0	$end_delimiter = '>';
7660							}
7661							else {
7662	0					0	for my $char (qw( ! " $ % & * + - . / = ? ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
7663	0	0				0	if (not $octet{$char}) {
7664	0					0	$delimiter = $char;
7665	0					0	$end_delimiter = $char;
7666	0					0	last;
7667							}
7668							}
7669							}
7670							}
7671
7672	1					3	my $prematch = '';
7673	1					12	return join '', $ope, $delimiter, $prematch, '(?:', $string, ')', $matched, $end_delimiter, $modifier;
7674							}
7675
7676	101	100				235	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
7677	101					373	my $metachar = qr/[\@\\\|[\]{^]/oxms;
7678
7679							# split regexp
7680	101					52534	my @char = $string =~ /\G((?>
7681							[^\x80-\xFF\\\$\@\[\(]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
7682							\\ (?>[1-9][0-9]*) \|
7683							\\g (?>\s) (?>[1-9][0-9]) \|
7684							\\g (?>\s) \{ (?>\s) (?>[1-9][0-9]) (?>\s) \} \|
7685							\\g (?>\s) \{ (?>\s) - (?>\s) (?>[1-9][0-9]) (?>\s*) \} \|
7686							\\x (?>[0-9A-Fa-f]{1,2}) \|
7687							\\ (?>[0-7]{2,3}) \|
7688							\\c [\x40-\x5F] \|
7689							\\x\{ (?>[0-9A-Fa-f]+) \} \|
7690							\\o\{ (?>[0-7]+) \} \|
7691							\\[bBNpP]\{ (?>[^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} \|
7692							\\ $q_char \|
7693							\$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} \|
7694							\$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} \|
7695							\$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} \|
7696							[\$\@] $qq_variable \|
7697							\$ (?>\s* [0-9]+) \|
7698							\$ (?>\s) \{ (?>\s [0-9]+ \s*) \} \|
7699							\$ \$ (?![\w\{]) \|
7700							\$ (?>\s) \$ (?>\s) $qq_variable \|
7701							\[\^ \|
7702							\[\: (?>[a-z]+) :\] \|
7703							\[\:\^ (?>[a-z]+) :\] \|
7704							\(\? \|
7705							$q_char
7706							))/oxmsg;
7707
7708							# choice again delimiter
7709	101	50				1980	if ($delimiter =~ / [\@:] /oxms) {
7710	0					0	my %octet = map {$_ => 1} @char;
	0					0
7711	0	0				0	if (not $octet{')'}) {
		0
		0
		0
7712	0					0	$delimiter = '(';
7713	0					0	$end_delimiter = ')';
7714							}
7715							elsif (not $octet{'}'}) {
7716	0					0	$delimiter = '{';
7717	0					0	$end_delimiter = '}';
7718							}
7719							elsif (not $octet{']'}) {
7720	0					0	$delimiter = '[';
7721	0					0	$end_delimiter = ']';
7722							}
7723							elsif (not $octet{'>'}) {
7724	0					0	$delimiter = '<';
7725	0					0	$end_delimiter = '>';
7726							}
7727							else {
7728	0					0	for my $char (qw( ! " $ % & * + - . / = ? ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
7729	0	0				0	if (not $octet{$char}) {
7730	0					0	$delimiter = $char;
7731	0					0	$end_delimiter = $char;
7732	0					0	last;
7733							}
7734							}
7735							}
7736							}
7737
7738							# count '('
7739	101					158	my $parens = grep { $_ eq '(' } @char;
	425					593
7740
7741	101					157	my $left_e = 0;
7742	101					149	my $right_e = 0;
7743	101					409	for (my $i=0; $i <= $#char; $i++) {
7744
7745							# "\L\u" --> "\u\L"
7746	346	50	33			2337	if (($char[$i] eq '\L') and ($char[$i+1] eq '\u')) {
		50	33
		100
		100
		50
		50
7747	0					0	@char[$i,$i+1] = @char[$i+1,$i];
7748							}
7749
7750							# "\U\l" --> "\l\U"
7751							elsif (($char[$i] eq '\U') and ($char[$i+1] eq '\l')) {
7752	0					0	@char[$i,$i+1] = @char[$i+1,$i];
7753							}
7754
7755							# octal escape sequence
7756							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
7757	1					2	$char[$i] = Eutf2::octchr($1);
7758							}
7759
7760							# hexadecimal escape sequence
7761							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
7762	1					2	$char[$i] = Eutf2::hexchr($1);
7763							}
7764
7765							# \b{...} --> b\{...}
7766							# \B{...} --> B\{...}
7767							# \N{CHARNAME} --> N\{CHARNAME}
7768							# \p{PROPERTY} --> p\{PROPERTY}
7769							# \P{PROPERTY} --> P\{PROPERTY}
7770							elsif ($char[$i] =~ /\A \\ ([bBNpP]) ( \{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
7771	0					0	$char[$i] = $1 . '\\' . $2;
7772							}
7773
7774							# \p, \P, \X --> p, P, X
7775							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
7776	0					0	$char[$i] = $1;
7777							}
7778
7779	346	50	66			1266	if (0) {
		100	66
		50	100
		100
		100
		100
		50
		50
		50
		50
		50
		100
		100
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		100
		100
		100
		50
		50
		100
		100
7780							}
7781
7782							# join separated multiple-octet
7783	0					0	elsif ($char[$i] =~ /\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms) {
7784	0	0	0			0	if ( ($i+3 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+3]) == 3) and (CORE::eval(sprintf '"%s%s%s%s"', @char[$i..$i+3]) =~ /\A $q_char \z/oxms)) {
		0	0
		0	0
			0
			0
			0
7785	0					0	$char[$i] .= join '', splice @char, $i+1, 3;
7786							}
7787							elsif (($i+2 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+2]) == 2) and (CORE::eval(sprintf '"%s%s%s"', @char[$i..$i+2]) =~ /\A $q_char \z/oxms)) {
7788	0					0	$char[$i] .= join '', splice @char, $i+1, 2;
7789							}
7790							elsif (($i+1 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, $char[$i+1 ]) == 1) and (CORE::eval(sprintf '"%s%s"', @char[$i..$i+1]) =~ /\A $q_char \z/oxms)) {
7791	0					0	$char[$i] .= join '', splice @char, $i+1, 1;
7792							}
7793							}
7794
7795							# open character class [...]
7796							elsif ($char[$i] eq '[') {
7797	20					25	my $left = $i;
7798	20	50				81	if ($char[$i+1] eq ']') {
7799	0					0	$i++;
7800							}
7801	20					20	while (1) {
7802	79	50				112	if (++$i > $#char) {
7803	0					0	die __FILE__, ": Unmatched [] in regexp\n";
7804							}
7805	79	100				111	if ($char[$i] eq ']') {
7806	20					19	my $right = $i;
7807
7808							# [...]
7809	20	50				116	if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
7810	0					0	splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	0					0
7811							}
7812							else {
7813	20					125	splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
7814							}
7815
7816	20					28	$i = $left;
7817	20					55	last;
7818							}
7819							}
7820							}
7821
7822							# open character class [^...]
7823							elsif ($char[$i] eq '[^') {
7824	0					0	my $left = $i;
7825	0	0				0	if ($char[$i+1] eq ']') {
7826	0					0	$i++;
7827							}
7828	0					0	while (1) {
7829	0	0				0	if (++$i > $#char) {
7830	0					0	die __FILE__, ": Unmatched [] in regexp\n";
7831							}
7832	0	0				0	if ($char[$i] eq ']') {
7833	0					0	my $right = $i;
7834
7835							# [^...]
7836	0	0				0	if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
7837	0					0	splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_not_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	0					0
7838							}
7839							else {
7840	0					0	splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
7841							}
7842
7843	0					0	$i = $left;
7844	0					0	last;
7845							}
7846							}
7847							}
7848
7849							# rewrite character class or escape character
7850							elsif (my $char = character_class($char[$i],$modifier)) {
7851	11					30	$char[$i] = $char;
7852							}
7853
7854							# /i modifier
7855							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
7856	3	50				5	if (CORE::length(Eutf2::fc($char[$i])) == 1) {
7857	3					5	$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
7858							}
7859							else {
7860	0					0	$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
7861							}
7862							}
7863
7864							# \u \l \U \L \F \Q \E
7865							elsif ($char[$i] =~ /\A [<>] \z/oxms) {
7866	8	50				33	if ($right_e < $left_e) {
7867	0					0	$char[$i] = '\\' . $char[$i];
7868							}
7869							}
7870							elsif ($char[$i] eq '\u') {
7871	0					0	$char[$i] = '@{[Eutf2::ucfirst qq<';
7872	0					0	$left_e++;
7873							}
7874							elsif ($char[$i] eq '\l') {
7875	0					0	$char[$i] = '@{[Eutf2::lcfirst qq<';
7876	0					0	$left_e++;
7877							}
7878							elsif ($char[$i] eq '\U') {
7879	0					0	$char[$i] = '@{[Eutf2::uc qq<';
7880	0					0	$left_e++;
7881							}
7882							elsif ($char[$i] eq '\L') {
7883	0					0	$char[$i] = '@{[Eutf2::lc qq<';
7884	0					0	$left_e++;
7885							}
7886							elsif ($char[$i] eq '\F') {
7887	0					0	$char[$i] = '@{[Eutf2::fc qq<';
7888	0					0	$left_e++;
7889							}
7890							elsif ($char[$i] eq '\Q') {
7891	5					7	$char[$i] = '@{[CORE::quotemeta qq<';
7892	5					13	$left_e++;
7893							}
7894							elsif ($char[$i] eq '\E') {
7895	5	50				8	if ($right_e < $left_e) {
7896	5					3	$char[$i] = '>]}';
7897	5					15	$right_e++;
7898							}
7899							else {
7900	0					0	$char[$i] = '';
7901							}
7902							}
7903							elsif ($char[$i] eq '\Q') {
7904	0					0	while (1) {
7905	0	0				0	if (++$i > $#char) {
7906	0					0	last;
7907							}
7908	0	0				0	if ($char[$i] eq '\E') {
7909	0					0	last;
7910							}
7911							}
7912							}
7913							elsif ($char[$i] eq '\E') {
7914							}
7915
7916							# \0 --> \0
7917							elsif ($char[$i] =~ /\A \\ (?>\s*) 0 \z/oxms) {
7918							}
7919
7920							# \g{N}, \g{-N}
7921
7922							# P.108 Using Simple Patterns
7923							# in Chapter 7: In the World of Regular Expressions
7924							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
7925
7926							# P.221 Capturing
7927							# in Chapter 5: Pattern Matching
7928							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
7929
7930							# \g{-1}, \g{-2}, \g{-3} --> \g{-1}, \g{-2}, \g{-3}
7931							elsif ($char[$i] =~ /\A \\g (?>\s) \{ (?>\s) - (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
7932							}
7933
7934							# \g{1}, \g{2}, \g{3} --> \g{2}, \g{3}, \g{4} (only when multibyte anchoring is enable)
7935							elsif ($char[$i] =~ /\A \\g (?>\s) \{ (?>\s) ((?>[1-9][0-9])) (?>\s) \} \z/oxms) {
7936							}
7937
7938							# \g1, \g2, \g3 --> \g2, \g3, \g4 (only when multibyte anchoring is enable)
7939							elsif ($char[$i] =~ /\A \\g (?>\s) ((?>[1-9][0-9])) \z/oxms) {
7940							}
7941
7942							# \1, \2, \3 --> \2, \3, \4 (only when multibyte anchoring is enable)
7943							elsif ($char[$i] =~ /\A \\ (?>\s) ((?>[1-9][0-9])) \z/oxms) {
7944							}
7945
7946							# $0 --> $0
7947							elsif ($char[$i] =~ /\A \$ 0 \z/oxms) {
7948	0	0				0	if ($ignorecase) {
7949	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7950							}
7951							}
7952							elsif ($char[$i] =~ /\A \$ \{ (?>\s) 0 (?>\s) \} \z/oxms) {
7953	0	0				0	if ($ignorecase) {
7954	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7955							}
7956							}
7957
7958							# $$ --> $$
7959							elsif ($char[$i] =~ /\A \$\$ \z/oxms) {
7960							}
7961
7962							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
7963							# $1, $2, $3 --> $1, $2, $3 otherwise
7964							elsif ($char[$i] =~ /\A \$ ((?>[1-9][0-9]*)) \z/oxms) {
7965	0					0	$char[$i] = e_capture($1);
7966	0	0				0	if ($ignorecase) {
7967	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7968							}
7969							}
7970							elsif ($char[$i] =~ /\A \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
7971	0					0	$char[$i] = e_capture($1);
7972	0	0				0	if ($ignorecase) {
7973	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7974							}
7975							}
7976
7977							# $$foo[ ... ] --> $ $foo->[ ... ]
7978							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ (?:$qq_bracket)*? \] ) \z/oxms) {
7979	0					0	$char[$i] = e_capture($1.'->'.$2);
7980	0	0				0	if ($ignorecase) {
7981	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7982							}
7983							}
7984
7985							# $$foo{ ... } --> $ $foo->{ ... }
7986							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ (?:$qq_brace)*? \} ) \z/oxms) {
7987	0					0	$char[$i] = e_capture($1.'->'.$2);
7988	0	0				0	if ($ignorecase) {
7989	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7990							}
7991							}
7992
7993							# $$foo
7994							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) \z/oxms) {
7995	0					0	$char[$i] = e_capture($1);
7996	0	0				0	if ($ignorecase) {
7997	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7998							}
7999							}
8000
8001							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
8002							elsif ($char[$i] =~ /\A ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) \z/oxmsgc) {
8003	4	50				11	if ($ignorecase) {
8004	0					0	$char[$i] = '@{[Eutf2::ignorecase(Eutf2::PREMATCH())]}';
8005							}
8006							else {
8007	4					20	$char[$i] = '@{[Eutf2::PREMATCH()]}';
8008							}
8009							}
8010
8011							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
8012							elsif ($char[$i] =~ /\A ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) \z/oxmsgc) {
8013	4	50				10	if ($ignorecase) {
8014	0					0	$char[$i] = '@{[Eutf2::ignorecase(Eutf2::MATCH())]}';
8015							}
8016							else {
8017	4					22	$char[$i] = '@{[Eutf2::MATCH()]}';
8018							}
8019							}
8020
8021							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
8022							elsif ($char[$i] =~ /\A ( \$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) \z/oxmsgc) {
8023	3	50				9	if ($ignorecase) {
8024	0					0	$char[$i] = '@{[Eutf2::ignorecase(Eutf2::POSTMATCH())]}';
8025							}
8026							else {
8027	3					14	$char[$i] = '@{[Eutf2::POSTMATCH()]}';
8028							}
8029							}
8030
8031							# ${ foo }
8032							elsif ($char[$i] =~ /\A \$ (?>\s) \{ ((?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* )) \} \z/oxms) {
8033	0	0				0	if ($ignorecase) {
8034	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8035							}
8036							}
8037
8038							# ${ ... }
8039							elsif ($char[$i] =~ /\A \$ (?>\s*) \{ ( .+ ) \} \z/oxms) {
8040	0					0	$char[$i] = e_capture($1);
8041	0	0				0	if ($ignorecase) {
8042	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8043							}
8044							}
8045
8046							# $scalar or @array
8047							elsif ($char[$i] =~ /\A [\$\@].+ /oxms) {
8048	9					19	$char[$i] = e_string($char[$i]);
8049	9	50				70	if ($ignorecase) {
8050	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8051							}
8052							}
8053
8054							# quote character before ? + * {
8055							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
8056	23	50				104	if ($char[$i-1] =~ /\A (?:[\x00-\xFF]\|\\[0-7]{2,3}\|\\x[0-9-A-Fa-f]{1,2}) \z/oxms) {
8057							}
8058							else {
8059	23					164	$char[$i-1] = '(?:' . $char[$i-1] . ')';
8060							}
8061							}
8062							}
8063
8064							# make regexp string
8065	101					169	my $prematch = '';
8066	101					137	$modifier =~ tr/i//d;
8067	101	50				401	if ($left_e > $right_e) {
8068	0					0	return join '', $ope, $delimiter, $prematch, '(?:', @char, '>]}' x ($left_e - $right_e), ')', $matched, $end_delimiter, $modifier;
8069							}
8070	101					1143	return join '', $ope, $delimiter, $prematch, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
8071							}
8072
8073							#
8074							# escape regexp (s'here'' or s'here''b)
8075							#
8076							sub e_s1_q {
8077	22			22	0	32	my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
8078	22		100			66	$modifier \|\|= '';
8079
8080	22					22	$modifier =~ tr/p//d;
8081	22	50				47	if ($modifier =~ /([adlu])/oxms) {
8082	0					0	my $line = 0;
8083	0					0	for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
8084	0	0				0	if ($filename ne __FILE__) {
8085	0					0	$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
8086	0					0	last;
8087							}
8088							}
8089	0					0	die qq{Unsupported modifier "$1" used at line $line.\n};
8090							}
8091
8092	22					24	$slash = 'div';
8093
8094							# literal null string pattern
8095	22	100				58	if ($string eq '') {
		100
8096	8					6	$modifier =~ tr/bB//d;
8097	8					8	$modifier =~ tr/i//d;
8098	8					47	return join '', $ope, $delimiter, $end_delimiter, $modifier;
8099							}
8100
8101							# with /b /B modifier
8102							elsif ($modifier =~ tr/bB//d) {
8103	1					3	return e_s1_qb($ope,$delimiter,$end_delimiter,$string,$modifier);
8104							}
8105
8106							# without /b /B modifier
8107							else {
8108	13					25	return e_s1_qt($ope,$delimiter,$end_delimiter,$string,$modifier);
8109							}
8110							}
8111
8112							#
8113							# escape regexp (s'here'')
8114							#
8115							sub e_s1_qt {
8116	13			13	0	20	my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
8117
8118	13	50				24	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
8119
8120							# split regexp
8121	13					538	my @char = $string =~ /\G((?>
8122							[^\x80-\xFF\\\[\$\@\/] \|
8123							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
8124							\[\^ \|
8125							\[\: (?>[a-z]+) \:\] \|
8126							\[\:\^ (?>[a-z]+) \:\] \|
8127							[\$\@\/] \|
8128							\\ (?:$q_char) \|
8129							(?:$q_char)
8130							))/oxmsg;
8131
8132							# unescape character
8133	13					58	for (my $i=0; $i <= $#char; $i++) {
8134	25	50	33			123	if (0) {
		50	33
		50	66
		100
		50
		50
8135							}
8136
8137							# open character class [...]
8138	0					0	elsif ($char[$i] eq '[') {
8139	0					0	my $left = $i;
8140	0	0				0	if ($char[$i+1] eq ']') {
8141	0					0	$i++;
8142							}
8143	0					0	while (1) {
8144	0	0				0	if (++$i > $#char) {
8145	0					0	die __FILE__, ": Unmatched [] in regexp\n";
8146							}
8147	0	0				0	if ($char[$i] eq ']') {
8148	0					0	my $right = $i;
8149
8150							# [...]
8151	0					0	splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
8152
8153	0					0	$i = $left;
8154	0					0	last;
8155							}
8156							}
8157							}
8158
8159							# open character class [^...]
8160							elsif ($char[$i] eq '[^') {
8161	0					0	my $left = $i;
8162	0	0				0	if ($char[$i+1] eq ']') {
8163	0					0	$i++;
8164							}
8165	0					0	while (1) {
8166	0	0				0	if (++$i > $#char) {
8167	0					0	die __FILE__, ": Unmatched [] in regexp\n";
8168							}
8169	0	0				0	if ($char[$i] eq ']') {
8170	0					0	my $right = $i;
8171
8172							# [^...]
8173	0					0	splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
8174
8175	0					0	$i = $left;
8176	0					0	last;
8177							}
8178							}
8179							}
8180
8181							# escape $ @ / and \
8182							elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
8183	0					0	$char[$i] = '\\' . $char[$i];
8184							}
8185
8186							# rewrite character class or escape character
8187							elsif (my $char = character_class($char[$i],$modifier)) {
8188	6					13	$char[$i] = $char;
8189							}
8190
8191							# /i modifier
8192							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
8193	0	0				0	if (CORE::length(Eutf2::fc($char[$i])) == 1) {
8194	0					0	$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
8195							}
8196							else {
8197	0					0	$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
8198							}
8199							}
8200
8201							# quote character before ? + * {
8202							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
8203	0	0				0	if ($char[$i-1] =~ /\A [\x00-\xFF] \z/oxms) {
8204							}
8205							else {
8206	0					0	$char[$i-1] = '(?:' . $char[$i-1] . ')';
8207							}
8208							}
8209							}
8210
8211	13					16	$modifier =~ tr/i//d;
8212	13					14	$delimiter = '/';
8213	13					14	$end_delimiter = '/';
8214	13					13	my $prematch = '';
8215	13					94	return join '', $ope, $delimiter, $prematch, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
8216							}
8217
8218							#
8219							# escape regexp (s'here''b)
8220							#
8221							sub e_s1_qb {
8222	1			1	0	2	my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
8223
8224							# split regexp
8225	1					5	my @char = $string =~ /\G (?>[^\\]\|\\\\\|[\x00-\xFF]) /oxmsg;
8226
8227							# unescape character
8228	1					30	for (my $i=0; $i <= $#char; $i++) {
8229	3	50				11	if (0) {
		50
8230							}
8231
8232							# remain \\
8233	0					0	elsif ($char[$i] eq '\\\\') {
8234							}
8235
8236							# escape $ @ / and \
8237							elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
8238	0					0	$char[$i] = '\\' . $char[$i];
8239							}
8240							}
8241
8242	1					2	$delimiter = '/';
8243	1					2	$end_delimiter = '/';
8244	1					1	my $prematch = '';
8245	1					8	return join '', $ope, $delimiter, $prematch, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
8246							}
8247
8248							#
8249							# escape regexp (s''here')
8250							#
8251							sub e_s2_q {
8252	17			17	0	23	my($ope,$delimiter,$end_delimiter,$string) = @_;
8253
8254	17					15	$slash = 'div';
8255
8256	17					292	my @char = $string =~ / \G (?>[^\x80-\xFF\\]\|\\\\\|$q_char) /oxmsg;
8257	17					52	for (my $i=0; $i <= $#char; $i++) {
8258	9	100				31	if (0) {
		100
8259							}
8260
8261							# not escape \\
8262	0					0	elsif ($char[$i] =~ /\A \\\\ \z/oxms) {
8263							}
8264
8265							# escape $ @ / and \
8266							elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
8267	5					14	$char[$i] = '\\' . $char[$i];
8268							}
8269							}
8270
8271	17					50	return join '', $ope, $delimiter, @char, $end_delimiter;
8272							}
8273
8274							#
8275							# escape regexp (s/here/and here/modifier)
8276							#
8277							sub e_sub {
8278	132			132	0	546	my($variable,$delimiter1,$pattern,$end_delimiter1,$delimiter2,$replacement,$end_delimiter2,$modifier) = @_;
8279	132		100			443	$modifier \|\|= '';
8280
8281	132					204	$modifier =~ tr/p//d;
8282	132	50				337	if ($modifier =~ /([adlu])/oxms) {
8283	0					0	my $line = 0;
8284	0					0	for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
8285	0	0				0	if ($filename ne __FILE__) {
8286	0					0	$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
8287	0					0	last;
8288							}
8289							}
8290	0					0	die qq{Unsupported modifier "$1" used at line $line.\n};
8291							}
8292
8293	132	100				295	if ($variable eq '') {
8294	37					35	$variable = '$_';
8295	37					39	$bind_operator = ' =~ ';
8296							}
8297
8298	132					156	$slash = 'div';
8299
8300							# P.128 Start of match (or end of previous match): \G
8301							# P.130 Advanced Use of \G with Perl
8302							# in Chapter 3: Overview of Regular Expression Features and Flavors
8303							# P.312 Iterative Matching: Scalar Context, with /g
8304							# in Chapter 7: Perl
8305							# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
8306
8307							# P.181 Where You Left Off: The \G Assertion
8308							# in Chapter 5: Pattern Matching
8309							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
8310
8311							# P.220 Where You Left Off: The \G Assertion
8312							# in Chapter 5: Pattern Matching
8313							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
8314
8315	132					156	my $e_modifier = $modifier =~ tr/e//d;
8316	132					145	my $r_modifier = $modifier =~ tr/r//d;
8317
8318	132					157	my $my = '';
8319	132	50				299	if ($variable =~ s/\A $ (?>\s*) ( (?>(?: local \b \| my \b \| our \b \| state \b )?) .+ ) $ \z/$1/oxms) {
8320	0					0	$my = $variable;
8321	0					0	$variable =~ s/ (?: local \b \| my \b \| our \b \| state \b ) (?>\s*) //oxms;
8322	0					0	$variable =~ s/ = .+ \z//oxms;
8323							}
8324
8325	132					263	(my $variable_basename = $variable) =~ s/ [\[\{].* \z//oxms;
8326	132					189	$variable_basename =~ s/ \s+ \z//oxms;
8327
8328							# quote replacement string
8329	132					145	my $e_replacement = '';
8330	132	100				247	if ($e_modifier >= 1) {
8331	17					29	$e_replacement = e_qq('', '', '', $replacement);
8332	17					19	$e_modifier--;
8333							}
8334							else {
8335	115	100				206	if ($delimiter2 eq "'") {
8336	17					32	$e_replacement = e_s2_q('qq', '/', '/', $replacement);
8337							}
8338							else {
8339	98					203	$e_replacement = e_qq ('qq', $delimiter2, $end_delimiter2, $replacement);
8340							}
8341							}
8342
8343	132					189	my $sub = '';
8344
8345							# with /r
8346	132	100				268	if ($r_modifier) {
8347	8	100				13	if (0) {
8348							}
8349
8350							# s///gr without multibyte anchoring
8351	0					0	elsif ($modifier =~ /g/oxms) {
8352	4	50				17	$sub = sprintf(
8353							# 1 2 3 4 5
8354							q,
8355
8356							$variable, # 1
8357							($delimiter1 eq "'") ? # 2
8358							e_s1_q('m', $delimiter1, $end_delimiter1, $pattern, $modifier) : # :
8359							e_s1 ('m', $delimiter1, $end_delimiter1, $pattern, $modifier), # :
8360							$s_matched, # 3
8361							$e_replacement, # 4
8362							'$UTF2::re_r=CORE::eval $UTF2::re_r; ' x $e_modifier, # 5
8363							);
8364							}
8365
8366							# s///r
8367							else {
8368
8369	4					4	my $prematch = q{$`};
8370
8371	4	50				11	$sub = sprintf(
8372							# 1 2 3 4 5 6 7
8373							q<(%s =~ %s) ? CORE::eval{%s local $^W=0; local $UTF2::re_r=%s; %s"%s$UTF2::re_r$'" } : %s>,
8374
8375							$variable, # 1
8376							($delimiter1 eq "'") ? # 2
8377							e_s1_q('m', $delimiter1, $end_delimiter1, $pattern, $modifier) : # :
8378							e_s1 ('m', $delimiter1, $end_delimiter1, $pattern, $modifier), # :
8379							$s_matched, # 3
8380							$e_replacement, # 4
8381							'$UTF2::re_r=CORE::eval $UTF2::re_r; ' x $e_modifier, # 5
8382							$prematch, # 6
8383							$variable, # 7
8384							);
8385							}
8386
8387							# $var !~ s///r doesn't make sense
8388	8	50				20	if ($bind_operator =~ / !~ /oxms) {
8389	0					0	$sub = q{die("$0: Using !~ with s///r doesn't make sense"), } . $sub;
8390							}
8391							}
8392
8393							# without /r
8394							else {
8395	124	100				248	if (0) {
8396							}
8397
8398							# s///g without multibyte anchoring
8399	0					0	elsif ($modifier =~ /g/oxms) {
8400	29	100				103	$sub = sprintf(
		100
8401							# 1 2 3 4 5 6 7 8
8402							q,
8403
8404							$variable, # 1
8405							($delimiter1 eq "'") ? # 2
8406							e_s1_q('m', $delimiter1, $end_delimiter1, $pattern, $modifier) : # :
8407							e_s1 ('m', $delimiter1, $end_delimiter1, $pattern, $modifier), # :
8408							$s_matched, # 3
8409							$e_replacement, # 4
8410							'$UTF2::re_r=CORE::eval $UTF2::re_r; ' x $e_modifier, # 5
8411							$variable, # 6
8412							$variable, # 7
8413							($bind_operator =~ / !~ /oxms) ? '!' : '', # 8
8414							);
8415							}
8416
8417							# s///
8418							else {
8419
8420	95					110	my $prematch = q{$`};
8421
8422	95	100				492	$sub = sprintf(
		100
8423
8424							($bind_operator =~ / =~ /oxms) ?
8425
8426							# 1 2 3 4 5 6 7 8
8427							q<(%s%s%s) ? CORE::eval{%s local $^W=0; local $UTF2::re_r=%s; %s%s="%s$UTF2::re_r$'"; 1 } : undef> :
8428
8429							# 1 2 3 4 5 6 7 8
8430							q<(%s%s%s) ? 1 : CORE::eval{%s local $^W=0; local $UTF2::re_r=%s; %s%s="%s$UTF2::re_r$'"; undef }>,
8431
8432							$variable, # 1
8433							$bind_operator, # 2
8434							($delimiter1 eq "'") ? # 3
8435							e_s1_q('m', $delimiter1, $end_delimiter1, $pattern, $modifier) : # :
8436							e_s1 ('m', $delimiter1, $end_delimiter1, $pattern, $modifier), # :
8437							$s_matched, # 4
8438							$e_replacement, # 5
8439							'$UTF2::re_r=CORE::eval $UTF2::re_r; ' x $e_modifier, # 6
8440							$variable, # 7
8441							$prematch, # 8
8442							);
8443							}
8444							}
8445
8446							# (my $foo = $bar) =~ s/// --> (my $foo = $bar, CORE::eval { ... })[1]
8447	132	50				319	if ($my ne '') {
8448	0					0	$sub = "($my, $sub)[1]";
8449							}
8450
8451							# clear s/// variable
8452	132					163	$sub_variable = '';
8453	132					132	$bind_operator = '';
8454
8455	132					1974	return $sub;
8456							}
8457
8458							#
8459							# escape regexp of split qr//
8460							#
8461							sub e_split {
8462	101			101	0	263	my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
8463	101		100			333	$modifier \|\|= '';
8464
8465	101					138	$modifier =~ tr/p//d;
8466	101	50				215	if ($modifier =~ /([adlu])/oxms) {
8467	0					0	my $line = 0;
8468	0					0	for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
8469	0	0				0	if ($filename ne __FILE__) {
8470	0					0	$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
8471	0					0	last;
8472							}
8473							}
8474	0					0	die qq{Unsupported modifier "$1" used at line $line.\n};
8475							}
8476
8477	101					113	$slash = 'div';
8478
8479							# /b /B modifier
8480	101	50				198	if ($modifier =~ tr/bB//d) {
8481	0					0	return join '', 'split', $ope, $delimiter, $string, $end_delimiter, $modifier;
8482							}
8483
8484	101	50				190	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
8485	101					323	my $metachar = qr/[\@\\\|[\]{^]/oxms;
8486
8487							# split regexp
8488	101					20781	my @char = $string =~ /\G((?>
8489							[^\x80-\xFF\\\$\@\[\(]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
8490							\\x (?>[0-9A-Fa-f]{1,2}) \|
8491							\\ (?>[0-7]{2,3}) \|
8492							\\c [\x40-\x5F] \|
8493							\\x\{ (?>[0-9A-Fa-f]+) \} \|
8494							\\o\{ (?>[0-7]+) \} \|
8495							\\[bBNpP]\{ (?>[^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} \|
8496							\\ $q_char \|
8497							\$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} \|
8498							\$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} \|
8499							\$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} \|
8500							[\$\@] $qq_variable \|
8501							\$ (?>\s* [0-9]+) \|
8502							\$ (?>\s) \{ (?>\s [0-9]+ \s*) \} \|
8503							\$ \$ (?![\w\{]) \|
8504							\$ (?>\s) \$ (?>\s) $qq_variable \|
8505							\[\^ \|
8506							\[\: (?>[a-z]+) :\] \|
8507							\[\:\^ (?>[a-z]+) :\] \|
8508							\(\? \|
8509							$q_char
8510							))/oxmsg;
8511
8512	101					767	my $left_e = 0;
8513	101					106	my $right_e = 0;
8514	101					276	for (my $i=0; $i <= $#char; $i++) {
8515
8516							# "\L\u" --> "\u\L"
8517	284	50	33			1652	if (($char[$i] eq '\L') and ($char[$i+1] eq '\u')) {
		50	33
		100
		100
		50
		50
8518	0					0	@char[$i,$i+1] = @char[$i+1,$i];
8519							}
8520
8521							# "\U\l" --> "\l\U"
8522							elsif (($char[$i] eq '\U') and ($char[$i+1] eq '\l')) {
8523	0					0	@char[$i,$i+1] = @char[$i+1,$i];
8524							}
8525
8526							# octal escape sequence
8527							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
8528	1					2	$char[$i] = Eutf2::octchr($1);
8529							}
8530
8531							# hexadecimal escape sequence
8532							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
8533	1					65	$char[$i] = Eutf2::hexchr($1);
8534							}
8535
8536							# \b{...} --> b\{...}
8537							# \B{...} --> B\{...}
8538							# \N{CHARNAME} --> N\{CHARNAME}
8539							# \p{PROPERTY} --> p\{PROPERTY}
8540							# \P{PROPERTY} --> P\{PROPERTY}
8541							elsif ($char[$i] =~ /\A \\ ([bBNpP]) ( \{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
8542	0					0	$char[$i] = $1 . '\\' . $2;
8543							}
8544
8545							# \p, \P, \X --> p, P, X
8546							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
8547	0					0	$char[$i] = $1;
8548							}
8549
8550	284	50	100			866	if (0) {
		100	33
		100	33
		100	100
		100
		50
		100
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		50
		100
		100
		100
		50
		50
		100
		100
8551							}
8552
8553							# join separated multiple-octet
8554	0					0	elsif ($char[$i] =~ /\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms) {
8555	0	0	0			0	if ( ($i+3 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+3]) == 3) and (CORE::eval(sprintf '"%s%s%s%s"', @char[$i..$i+3]) =~ /\A $q_char \z/oxms)) {
		0	0
		0	0
			0
			0
			0
8556	0					0	$char[$i] .= join '', splice @char, $i+1, 3;
8557							}
8558							elsif (($i+2 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+2]) == 2) and (CORE::eval(sprintf '"%s%s%s"', @char[$i..$i+2]) =~ /\A $q_char \z/oxms)) {
8559	0					0	$char[$i] .= join '', splice @char, $i+1, 2;
8560							}
8561							elsif (($i+1 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, $char[$i+1 ]) == 1) and (CORE::eval(sprintf '"%s%s"', @char[$i..$i+1]) =~ /\A $q_char \z/oxms)) {
8562	0					0	$char[$i] .= join '', splice @char, $i+1, 1;
8563							}
8564							}
8565
8566							# open character class [...]
8567							elsif ($char[$i] eq '[') {
8568	3					4	my $left = $i;
8569	3	50				13	if ($char[$i+1] eq ']') {
8570	0					0	$i++;
8571							}
8572	3					3	while (1) {
8573	7	50				11	if (++$i > $#char) {
8574	0					0	die __FILE__, ": Unmatched [] in regexp\n";
8575							}
8576	7	100				14	if ($char[$i] eq ']') {
8577	3					4	my $right = $i;
8578
8579							# [...]
8580	3	50				23	if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
8581	0					0	splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	0					0
8582							}
8583							else {
8584	3					16	splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
8585							}
8586
8587	3					4	$i = $left;
8588	3					8	last;
8589							}
8590							}
8591							}
8592
8593							# open character class [^...]
8594							elsif ($char[$i] eq '[^') {
8595	1					2	my $left = $i;
8596	1	50				4	if ($char[$i+1] eq ']') {
8597	0					0	$i++;
8598							}
8599	1					2	while (1) {
8600	2	50				4	if (++$i > $#char) {
8601	0					0	die __FILE__, ": Unmatched [] in regexp\n";
8602							}
8603	2	100				5	if ($char[$i] eq ']') {
8604	1					1	my $right = $i;
8605
8606							# [^...]
8607	1	50				9	if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
8608	0					0	splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_not_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	0					0
8609							}
8610							else {
8611	1					7	splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
8612							}
8613
8614	1					2	$i = $left;
8615	1					3	last;
8616							}
8617							}
8618							}
8619
8620							# rewrite character class or escape character
8621							elsif (my $char = character_class($char[$i],$modifier)) {
8622	5					20	$char[$i] = $char;
8623							}
8624
8625							# P.794 29.2.161. split
8626							# in Chapter 29: Functions
8627							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
8628
8629							# P.951 split
8630							# in Chapter 27: Functions
8631							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
8632
8633							# said "The //m modifier is assumed when you split on the pattern /^/",
8634							# but perl5.008 is not so. Therefore, this software adds //m.
8635							# (and so on)
8636
8637							# split(m/^/) --> split(m/^/m)
8638							elsif (($char[$i] eq '^') and ($modifier !~ /m/oxms)) {
8639	11					75	$modifier .= 'm';
8640							}
8641
8642							# /i modifier
8643							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
8644	0	0				0	if (CORE::length(Eutf2::fc($char[$i])) == 1) {
8645	0					0	$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
8646							}
8647							else {
8648	0					0	$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
8649							}
8650							}
8651
8652							# \u \l \U \L \F \Q \E
8653							elsif ($char[$i] =~ /\A ([<>]) \z/oxms) {
8654	2	50				9	if ($right_e < $left_e) {
8655	0					0	$char[$i] = '\\' . $char[$i];
8656							}
8657							}
8658							elsif ($char[$i] eq '\u') {
8659	0					0	$char[$i] = '@{[Eutf2::ucfirst qq<';
8660	0					0	$left_e++;
8661							}
8662							elsif ($char[$i] eq '\l') {
8663	0					0	$char[$i] = '@{[Eutf2::lcfirst qq<';
8664	0					0	$left_e++;
8665							}
8666							elsif ($char[$i] eq '\U') {
8667	0					0	$char[$i] = '@{[Eutf2::uc qq<';
8668	0					0	$left_e++;
8669							}
8670							elsif ($char[$i] eq '\L') {
8671	0					0	$char[$i] = '@{[Eutf2::lc qq<';
8672	0					0	$left_e++;
8673							}
8674							elsif ($char[$i] eq '\F') {
8675	0					0	$char[$i] = '@{[Eutf2::fc qq<';
8676	0					0	$left_e++;
8677							}
8678							elsif ($char[$i] eq '\Q') {
8679	0					0	$char[$i] = '@{[CORE::quotemeta qq<';
8680	0					0	$left_e++;
8681							}
8682							elsif ($char[$i] eq '\E') {
8683	0	0				0	if ($right_e < $left_e) {
8684	0					0	$char[$i] = '>]}';
8685	0					0	$right_e++;
8686							}
8687							else {
8688	0					0	$char[$i] = '';
8689							}
8690							}
8691							elsif ($char[$i] eq '\Q') {
8692	0					0	while (1) {
8693	0	0				0	if (++$i > $#char) {
8694	0					0	last;
8695							}
8696	0	0				0	if ($char[$i] eq '\E') {
8697	0					0	last;
8698							}
8699							}
8700							}
8701							elsif ($char[$i] eq '\E') {
8702							}
8703
8704							# $0 --> $0
8705							elsif ($char[$i] =~ /\A \$ 0 \z/oxms) {
8706	0	0				0	if ($ignorecase) {
8707	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8708							}
8709							}
8710							elsif ($char[$i] =~ /\A \$ \{ (?>\s) 0 (?>\s) \} \z/oxms) {
8711	0	0				0	if ($ignorecase) {
8712	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8713							}
8714							}
8715
8716							# $$ --> $$
8717							elsif ($char[$i] =~ /\A \$\$ \z/oxms) {
8718							}
8719
8720							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
8721							# $1, $2, $3 --> $1, $2, $3 otherwise
8722							elsif ($char[$i] =~ /\A \$ ((?>[1-9][0-9]*)) \z/oxms) {
8723	0					0	$char[$i] = e_capture($1);
8724	0	0				0	if ($ignorecase) {
8725	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8726							}
8727							}
8728							elsif ($char[$i] =~ /\A \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
8729	0					0	$char[$i] = e_capture($1);
8730	0	0				0	if ($ignorecase) {
8731	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8732							}
8733							}
8734
8735							# $$foo[ ... ] --> $ $foo->[ ... ]
8736							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ (?:$qq_bracket)*? \] ) \z/oxms) {
8737	0					0	$char[$i] = e_capture($1.'->'.$2);
8738	0	0				0	if ($ignorecase) {
8739	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8740							}
8741							}
8742
8743							# $$foo{ ... } --> $ $foo->{ ... }
8744							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ (?:$qq_brace)*? \} ) \z/oxms) {
8745	0					0	$char[$i] = e_capture($1.'->'.$2);
8746	0	0				0	if ($ignorecase) {
8747	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8748							}
8749							}
8750
8751							# $$foo
8752							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) \z/oxms) {
8753	0					0	$char[$i] = e_capture($1);
8754	0	0				0	if ($ignorecase) {
8755	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8756							}
8757							}
8758
8759							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
8760							elsif ($char[$i] =~ /\A ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) \z/oxmsgc) {
8761	12	50				18	if ($ignorecase) {
8762	0					0	$char[$i] = '@{[Eutf2::ignorecase(Eutf2::PREMATCH())]}';
8763							}
8764							else {
8765	12					81	$char[$i] = '@{[Eutf2::PREMATCH()]}';
8766							}
8767							}
8768
8769							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
8770							elsif ($char[$i] =~ /\A ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) \z/oxmsgc) {
8771	12	50				21	if ($ignorecase) {
8772	0					0	$char[$i] = '@{[Eutf2::ignorecase(Eutf2::MATCH())]}';
8773							}
8774							else {
8775	12					86	$char[$i] = '@{[Eutf2::MATCH()]}';
8776							}
8777							}
8778
8779							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
8780							elsif ($char[$i] =~ /\A ( \$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) \z/oxmsgc) {
8781	9	50				15	if ($ignorecase) {
8782	0					0	$char[$i] = '@{[Eutf2::ignorecase(Eutf2::POSTMATCH())]}';
8783							}
8784							else {
8785	9					63	$char[$i] = '@{[Eutf2::POSTMATCH()]}';
8786							}
8787							}
8788
8789							# ${ foo }
8790							elsif ($char[$i] =~ /\A \$ (?>\s) \{ ((?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* )) \} \z/oxms) {
8791	0	0				0	if ($ignorecase) {
8792	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $1 . ')]}';
8793							}
8794							}
8795
8796							# ${ ... }
8797							elsif ($char[$i] =~ /\A \$ (?>\s*) \{ ( .+ ) \} \z/oxms) {
8798	0					0	$char[$i] = e_capture($1);
8799	0	0				0	if ($ignorecase) {
8800	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8801							}
8802							}
8803
8804							# $scalar or @array
8805							elsif ($char[$i] =~ /\A [\$\@].+ /oxms) {
8806	3					16	$char[$i] = e_string($char[$i]);
8807	3	50				21	if ($ignorecase) {
8808	0					0	$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8809							}
8810							}
8811
8812							# quote character before ? + * {
8813							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
8814	7	100				45	if ($char[$i-1] =~ /\A (?:[\x00-\xFF]\|\\[0-7]{2,3}\|\\x[0-9-A-Fa-f]{1,2}) \z/oxms) {
8815							}
8816							else {
8817	4					31	$char[$i-1] = '(?:' . $char[$i-1] . ')';
8818							}
8819							}
8820							}
8821
8822							# make regexp string
8823	101					128	$modifier =~ tr/i//d;
8824	101	50				200	if ($left_e > $right_e) {
8825	0					0	return join '', 'Eutf2::split', $ope, $delimiter, @char, '>]}' x ($left_e - $right_e), $end_delimiter, $modifier;
8826							}
8827	101					1270	return join '', 'Eutf2::split', $ope, $delimiter, @char, $end_delimiter, $modifier;
8828							}
8829
8830							#
8831							# escape regexp of split qr''
8832							#
8833							sub e_split_q {
8834	0			0	0		my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
8835	0		0				$modifier \|\|= '';
8836
8837	0						$modifier =~ tr/p//d;
8838	0	0					if ($modifier =~ /([adlu])/oxms) {
8839	0						my $line = 0;
8840	0						for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
8841	0	0					if ($filename ne __FILE__) {
8842	0						$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
8843	0						last;
8844							}
8845							}
8846	0						die qq{Unsupported modifier "$1" used at line $line.\n};
8847							}
8848
8849	0						$slash = 'div';
8850
8851							# /b /B modifier
8852	0	0					if ($modifier =~ tr/bB//d) {
8853	0						return join '', 'split', $ope, $delimiter, $string, $end_delimiter, $modifier;
8854							}
8855
8856	0	0					my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
8857
8858							# split regexp
8859	0						my @char = $string =~ /\G((?>
8860							[^\x80-\xFF\\\[] \|
8861							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
8862							\[\^ \|
8863							\[\: (?>[a-z]+) \:\] \|
8864							\[\:\^ (?>[a-z]+) \:\] \|
8865							\\ (?:$q_char) \|
8866							(?:$q_char)
8867							))/oxmsg;
8868
8869							# unescape character
8870	0						for (my $i=0; $i <= $#char; $i++) {
8871	0	0	0				if (0) {
		0	0
		0	0
		0	0
		0
		0
8872							}
8873
8874							# open character class [...]
8875	0						elsif ($char[$i] eq '[') {
8876	0						my $left = $i;
8877	0	0					if ($char[$i+1] eq ']') {
8878	0						$i++;
8879							}
8880	0						while (1) {
8881	0	0					if (++$i > $#char) {
8882	0						die __FILE__, ": Unmatched [] in regexp\n";
8883							}
8884	0	0					if ($char[$i] eq ']') {
8885	0						my $right = $i;
8886
8887							# [...]
8888	0						splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
8889
8890	0						$i = $left;
8891	0						last;
8892							}
8893							}
8894							}
8895
8896							# open character class [^...]
8897							elsif ($char[$i] eq '[^') {
8898	0						my $left = $i;
8899	0	0					if ($char[$i+1] eq ']') {
8900	0						$i++;
8901							}
8902	0						while (1) {
8903	0	0					if (++$i > $#char) {
8904	0						die __FILE__, ": Unmatched [] in regexp\n";
8905							}
8906	0	0					if ($char[$i] eq ']') {
8907	0						my $right = $i;
8908
8909							# [^...]
8910	0						splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
8911
8912	0						$i = $left;
8913	0						last;
8914							}
8915							}
8916							}
8917
8918							# rewrite character class or escape character
8919							elsif (my $char = character_class($char[$i],$modifier)) {
8920	0						$char[$i] = $char;
8921							}
8922
8923							# split(m/^/) --> split(m/^/m)
8924							elsif (($char[$i] eq '^') and ($modifier !~ /m/oxms)) {
8925	0						$modifier .= 'm';
8926							}
8927
8928							# /i modifier
8929							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
8930	0	0					if (CORE::length(Eutf2::fc($char[$i])) == 1) {
8931	0						$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
8932							}
8933							else {
8934	0						$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
8935							}
8936							}
8937
8938							# quote character before ? + * {
8939							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
8940	0	0					if ($char[$i-1] =~ /\A [\x00-\xFF] \z/oxms) {
8941							}
8942							else {
8943	0						$char[$i-1] = '(?:' . $char[$i-1] . ')';
8944							}
8945							}
8946							}
8947
8948	0						$modifier =~ tr/i//d;
8949	0						return join '', 'Eutf2::split', $ope, $delimiter, @char, $end_delimiter, $modifier;
8950							}
8951
8952							#
8953							# instead of Carp::carp
8954							#
8955							sub carp {
8956	0			0	0		my($package,$filename,$line) = caller(1);
8957	0						print STDERR "@_ at $filename line $line.\n";
8958							}
8959
8960							#
8961							# instead of Carp::croak
8962							#
8963							sub croak {
8964	0			0	0		my($package,$filename,$line) = caller(1);
8965	0						print STDERR "@_ at $filename line $line.\n";
8966	0						die "\n";
8967							}
8968
8969							#
8970							# instead of Carp::cluck
8971							#
8972							sub cluck {
8973	0			0	0		my $i = 0;
8974	0						my @cluck = ();
8975	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
8976	0						push @cluck, "[$i] $filename($line) $package::$subroutine\n";
8977	0						$i++;
8978							}
8979	0						print STDERR CORE::reverse @cluck;
8980	0						print STDERR "\n";
8981	0						carp @_;
8982							}
8983
8984							#
8985							# instead of Carp::confess
8986							#
8987							sub confess {
8988	0			0	0		my $i = 0;
8989	0						my @confess = ();
8990	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
8991	0						push @confess, "[$i] $filename($line) $package::$subroutine\n";
8992	0						$i++;
8993							}
8994	0						print STDERR CORE::reverse @confess;
8995	0						print STDERR "\n";
8996	0						croak @_;
8997							}
8998
8999							1;
9000
9001							__END__