File Coverage

Char/Ekoi8u.pm

Criterion	Covered	Total	%
statement	51	954	5.3
branch	4	562	0.7
condition	1	180	0.5
subroutine	20	85	23.5
pod	7	50	14.0
total	83	1831	4.5

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# This file is * FOR CPAN USE ONLY *.
3							#
4							package Char::Ekoi8u;
5							######################################################################
6							#
7							# Char::Ekoi8u - Run-time routines for Char/KOI8U.pm
8							#
9							# http://search.cpan.org/dist/Char-KOI8U/
10							#
11							# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 INABA Hitoshi
12							######################################################################
13
14	177			177		4905	use 5.00503; # Galapagos Consensus 1998 for primetools
	177					947
	177					9731
15							# use 5.008001; # Lancaster Consensus 2013 for toolchains
16
17							# 12.3. Delaying use Until Runtime
18							# in Chapter 12. Packages, Libraries, and Modules
19							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
20							# (and so on)
21
22	177			177		13833	BEGIN { eval q{ use vars qw($VERSION) } }
	177			177		1406
	177					475
	177					54458
23							$VERSION = sprintf '%d.%02d', q$Revision: 1.00 $ =~ /(\d+)/xmsg;
24
25							BEGIN {
26	177	50		177		1373	if ($^X =~ / jperl /oxmsi) {
27	0					0	die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)";
28							}
29	177					326	if (CORE::ord('A') == 193) {
30							die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).";
31							}
32	177					38746	if (CORE::ord('A') != 0x41) {
33							die __FILE__, ": is not US-ASCII script (must be US-ASCII script).";
34							}
35							}
36
37							BEGIN {
38
39							# instead of utf8.pm
40	177			177		14916	eval q{
	177			177		1567
	177			69		457
	177					34338
	69					15486
	59					12278
	65					31399
	54					10583
	50					9745
	57					12230
41							no warnings qw(redefine);
42							*utf8::upgrade = sub { CORE::length $_[0] };
43							*utf8::downgrade = sub { 1 };
44							*utf8::encode = sub { };
45							*utf8::decode = sub { 1 };
46							*utf8::is_utf8 = sub { };
47							*utf8::valid = sub { 1 };
48							};
49	177	50				172402	if ($@) {
50	0					0	*utf8::upgrade = sub { CORE::length $_[0] };
	0					0
51	0					0	*utf8::downgrade = sub { 1 };
	0					0
52	0					0	*utf8::encode = sub { };
	0					0
53	0					0	*utf8::decode = sub { 1 };
	0					0
54	0					0	*utf8::is_utf8 = sub { };
	0					0
55	0					0	*utf8::valid = sub { 1 };
	0					0
56							}
57							}
58
59							# instead of Symbol.pm
60							BEGIN {
61	177			177		498	my $genpkg = "Symbol::";
62	177					11784	my $genseq = 0;
63
64							sub gensym () {
65	0			0	0	0	my $name = "GEN" . $genseq++;
66
67							# here, no strict qw(refs); if strict.pm exists
68
69	0					0	my $ref = \*{$genpkg . $name};
	0					0
70	0					0	delete $$genpkg{$name};
71	0					0	return $ref;
72							}
73
74							sub qualify ($;$) {
75	0			0	0	0	my ($name) = @_;
76	0	0	0			0	if (!ref($name) && (Char::Ekoi8u::index($name, '::') == -1) && (Char::Ekoi8u::index($name, "'") == -1)) {
			0
77	0					0	my $pkg;
78	0					0	my %global = map {$_ => 1} qw(ARGV ARGVOUT ENV INC SIG STDERR STDIN STDOUT DATA);
	0					0
79
80							# Global names: special character, "^xyz", or other.
81	0	0	0			0	if ($name =~ /^(([^a-z])\|(\^[a-z_]+))\z/i \|\| $global{$name}) {
82							# RGS 2001-11-05 : translate leading ^X to control-char
83	0					0	$name =~ s/^\^([a-z_])/'qq(\c'.$1.')'/eei;
	0					0
84	0					0	$pkg = "main";
85							}
86							else {
87	0	0				0	$pkg = (@_ > 1) ? $_[1] : caller;
88							}
89	0					0	$name = $pkg . "::" . $name;
90							}
91	0					0	return $name;
92							}
93
94							sub qualify_to_ref ($;$) {
95
96							# here, no strict qw(refs); if strict.pm exists
97
98	0	0		0	0	0	return \*{ qualify $_[0], @_ > 1 ? $_[1] : caller };
	0					0
99							}
100							}
101
102							# Column: local $@
103							# in Chapter 9. Osaete okitai Perl no kiso
104							# of ISBN 10: 4798119172 \| ISBN 13: 978-4798119175 MODAN Perl NYUMON
105							# (and so on)
106
107							# use strict; if strict.pm exists
108							BEGIN {
109	177	50		177		477	if (eval { local $@; CORE::require strict }) {
	177					425
	177					2241
110	177					33224	strict::->import;
111							}
112							}
113
114							# P.714 29.2.39. flock
115							# in Chapter 29: Functions
116							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
117
118							# P.863 flock
119							# in Chapter 27: Functions
120							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
121
122							sub LOCK_SH() {1}
123							sub LOCK_EX() {2}
124							sub LOCK_UN() {8}
125							sub LOCK_NB() {4}
126
127							# instead of Carp.pm
128							sub carp;
129							sub croak;
130							sub cluck;
131							sub confess;
132
133							my $your_char = q{[\x00-\xFF]};
134
135							# regexp of character
136	177			177		13528	BEGIN { eval q{ use vars qw($q_char) } }
	177			177		1159
	177					340
	177					22135
137							$q_char = qr/$your_char/oxms;
138
139							#
140							# KOI8-U character range per length
141							#
142							my %range_tr = ();
143
144							#
145							# alias of encoding name
146							#
147	177			177		11416	BEGIN { eval q{ use vars qw($encoding_alias) } }
	177			177		989
	177					658
	177					517616
148
149							#
150							# KOI8-U case conversion
151							#
152							my %lc = ();
153							@lc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
154							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
155							my %uc = ();
156							@uc{qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)} =
157							qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
158							my %fc = ();
159							@fc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
160							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
161
162							if (0) {
163							}
164
165							elsif (__PACKAGE__ =~ / \b Ekoi8u \z/oxms) {
166							%range_tr = (
167							1 => [ [0x00..0xFF],
168							],
169							);
170							$encoding_alias = qr/ \b (?: koi8-?u ) \b /oxmsi;
171
172							%lc = (%lc,
173							"\xB3" => "\xA3", # CYRILLIC LETTER IO
174							"\xB4" => "\xA4", # CYRILLIC LETTER UKRAINIAN IE
175							"\xB6" => "\xA6", # CYRILLIC LETTER BYELORUSSIAN-UKRAINIAN I
176							"\xB7" => "\xA7", # CYRILLIC LETTER YI (UKRAINIAN)
177							"\xBD" => "\xAD", # CYRILLIC LETTER GHE WITH UPTURN
178							"\xE0" => "\xC0", # CYRILLIC LETTER YU
179							"\xE1" => "\xC1", # CYRILLIC LETTER A
180							"\xE2" => "\xC2", # CYRILLIC LETTER BE
181							"\xE3" => "\xC3", # CYRILLIC LETTER TSE
182							"\xE4" => "\xC4", # CYRILLIC LETTER DE
183							"\xE5" => "\xC5", # CYRILLIC LETTER IE
184							"\xE6" => "\xC6", # CYRILLIC LETTER EF
185							"\xE7" => "\xC7", # CYRILLIC LETTER GHE
186							"\xE8" => "\xC8", # CYRILLIC LETTER KHA
187							"\xE9" => "\xC9", # CYRILLIC LETTER I
188							"\xEA" => "\xCA", # CYRILLIC LETTER SHORT I
189							"\xEB" => "\xCB", # CYRILLIC LETTER KA
190							"\xEC" => "\xCC", # CYRILLIC LETTER EL
191							"\xED" => "\xCD", # CYRILLIC LETTER EM
192							"\xEE" => "\xCE", # CYRILLIC LETTER EN
193							"\xEF" => "\xCF", # CYRILLIC LETTER O
194							"\xF0" => "\xD0", # CYRILLIC LETTER PE
195							"\xF1" => "\xD1", # CYRILLIC LETTER YA
196							"\xF2" => "\xD2", # CYRILLIC LETTER ER
197							"\xF3" => "\xD3", # CYRILLIC LETTER ES
198							"\xF4" => "\xD4", # CYRILLIC LETTER TE
199							"\xF5" => "\xD5", # CYRILLIC LETTER U
200							"\xF6" => "\xD6", # CYRILLIC LETTER ZHE
201							"\xF7" => "\xD7", # CYRILLIC LETTER VE
202							"\xF8" => "\xD8", # CYRILLIC LETTER SOFT SIGN
203							"\xF9" => "\xD9", # CYRILLIC LETTER YERU
204							"\xFA" => "\xDA", # CYRILLIC LETTER ZE
205							"\xFB" => "\xDB", # CYRILLIC LETTER SHA
206							"\xFC" => "\xDC", # CYRILLIC LETTER E
207							"\xFD" => "\xDD", # CYRILLIC LETTER SHCHA
208							"\xFE" => "\xDE", # CYRILLIC LETTER CHE
209							"\xFF" => "\xDF", # CYRILLIC LETTER HARD SIGN
210							);
211
212							%uc = (%uc,
213							"\xA3" => "\xB3", # CYRILLIC LETTER IO
214							"\xA4" => "\xB4", # CYRILLIC LETTER UKRAINIAN IE
215							"\xA6" => "\xB6", # CYRILLIC LETTER BYELORUSSIAN-UKRAINIAN I
216							"\xA7" => "\xB7", # CYRILLIC LETTER YI (UKRAINIAN)
217							"\xAD" => "\xBD", # CYRILLIC LETTER GHE WITH UPTURN
218							"\xC0" => "\xE0", # CYRILLIC LETTER YU
219							"\xC1" => "\xE1", # CYRILLIC LETTER A
220							"\xC2" => "\xE2", # CYRILLIC LETTER BE
221							"\xC3" => "\xE3", # CYRILLIC LETTER TSE
222							"\xC4" => "\xE4", # CYRILLIC LETTER DE
223							"\xC5" => "\xE5", # CYRILLIC LETTER IE
224							"\xC6" => "\xE6", # CYRILLIC LETTER EF
225							"\xC7" => "\xE7", # CYRILLIC LETTER GHE
226							"\xC8" => "\xE8", # CYRILLIC LETTER KHA
227							"\xC9" => "\xE9", # CYRILLIC LETTER I
228							"\xCA" => "\xEA", # CYRILLIC LETTER SHORT I
229							"\xCB" => "\xEB", # CYRILLIC LETTER KA
230							"\xCC" => "\xEC", # CYRILLIC LETTER EL
231							"\xCD" => "\xED", # CYRILLIC LETTER EM
232							"\xCE" => "\xEE", # CYRILLIC LETTER EN
233							"\xCF" => "\xEF", # CYRILLIC LETTER O
234							"\xD0" => "\xF0", # CYRILLIC LETTER PE
235							"\xD1" => "\xF1", # CYRILLIC LETTER YA
236							"\xD2" => "\xF2", # CYRILLIC LETTER ER
237							"\xD3" => "\xF3", # CYRILLIC LETTER ES
238							"\xD4" => "\xF4", # CYRILLIC LETTER TE
239							"\xD5" => "\xF5", # CYRILLIC LETTER U
240							"\xD6" => "\xF6", # CYRILLIC LETTER ZHE
241							"\xD7" => "\xF7", # CYRILLIC LETTER VE
242							"\xD8" => "\xF8", # CYRILLIC LETTER SOFT SIGN
243							"\xD9" => "\xF9", # CYRILLIC LETTER YERU
244							"\xDA" => "\xFA", # CYRILLIC LETTER ZE
245							"\xDB" => "\xFB", # CYRILLIC LETTER SHA
246							"\xDC" => "\xFC", # CYRILLIC LETTER E
247							"\xDD" => "\xFD", # CYRILLIC LETTER SHCHA
248							"\xDE" => "\xFE", # CYRILLIC LETTER CHE
249							"\xDF" => "\xFF", # CYRILLIC LETTER HARD SIGN
250							);
251
252							%fc = (%fc,
253							"\xB3" => "\xA3", # CYRILLIC CAPITAL LETTER IO --> CYRILLIC SMALL LETTER IO
254							"\xB4" => "\xA4", # CYRILLIC CAPITAL LETTER UKRAINIAN IE --> CYRILLIC SMALL LETTER UKRAINIAN IE
255							"\xB6" => "\xA6", # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I --> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
256							"\xB7" => "\xA7", # CYRILLIC CAPITAL LETTER YI --> CYRILLIC SMALL LETTER YI
257							"\xBD" => "\xAD", # CYRILLIC CAPITAL LETTER GHE WITH UPTURN --> CYRILLIC SMALL LETTER GHE WITH UPTURN
258							"\xE0" => "\xC0", # CYRILLIC CAPITAL LETTER YU --> CYRILLIC SMALL LETTER YU
259							"\xE1" => "\xC1", # CYRILLIC CAPITAL LETTER A --> CYRILLIC SMALL LETTER A
260							"\xE2" => "\xC2", # CYRILLIC CAPITAL LETTER BE --> CYRILLIC SMALL LETTER BE
261							"\xE3" => "\xC3", # CYRILLIC CAPITAL LETTER TSE --> CYRILLIC SMALL LETTER TSE
262							"\xE4" => "\xC4", # CYRILLIC CAPITAL LETTER DE --> CYRILLIC SMALL LETTER DE
263							"\xE5" => "\xC5", # CYRILLIC CAPITAL LETTER IE --> CYRILLIC SMALL LETTER IE
264							"\xE6" => "\xC6", # CYRILLIC CAPITAL LETTER EF --> CYRILLIC SMALL LETTER EF
265							"\xE7" => "\xC7", # CYRILLIC CAPITAL LETTER GHE --> CYRILLIC SMALL LETTER GHE
266							"\xE8" => "\xC8", # CYRILLIC CAPITAL LETTER HA --> CYRILLIC SMALL LETTER HA
267							"\xE9" => "\xC9", # CYRILLIC CAPITAL LETTER I --> CYRILLIC SMALL LETTER I
268							"\xEA" => "\xCA", # CYRILLIC CAPITAL LETTER SHORT I --> CYRILLIC SMALL LETTER SHORT I
269							"\xEB" => "\xCB", # CYRILLIC CAPITAL LETTER KA --> CYRILLIC SMALL LETTER KA
270							"\xEC" => "\xCC", # CYRILLIC CAPITAL LETTER EL --> CYRILLIC SMALL LETTER EL
271							"\xED" => "\xCD", # CYRILLIC CAPITAL LETTER EM --> CYRILLIC SMALL LETTER EM
272							"\xEE" => "\xCE", # CYRILLIC CAPITAL LETTER EN --> CYRILLIC SMALL LETTER EN
273							"\xEF" => "\xCF", # CYRILLIC CAPITAL LETTER O --> CYRILLIC SMALL LETTER O
274							"\xF0" => "\xD0", # CYRILLIC CAPITAL LETTER PE --> CYRILLIC SMALL LETTER PE
275							"\xF1" => "\xD1", # CYRILLIC CAPITAL LETTER YA --> CYRILLIC SMALL LETTER YA
276							"\xF2" => "\xD2", # CYRILLIC CAPITAL LETTER ER --> CYRILLIC SMALL LETTER ER
277							"\xF3" => "\xD3", # CYRILLIC CAPITAL LETTER ES --> CYRILLIC SMALL LETTER ES
278							"\xF4" => "\xD4", # CYRILLIC CAPITAL LETTER TE --> CYRILLIC SMALL LETTER TE
279							"\xF5" => "\xD5", # CYRILLIC CAPITAL LETTER U --> CYRILLIC SMALL LETTER U
280							"\xF6" => "\xD6", # CYRILLIC CAPITAL LETTER ZHE --> CYRILLIC SMALL LETTER ZHE
281							"\xF7" => "\xD7", # CYRILLIC CAPITAL LETTER VE --> CYRILLIC SMALL LETTER VE
282							"\xF8" => "\xD8", # CYRILLIC CAPITAL LETTER SOFT SIGN --> CYRILLIC SMALL LETTER SOFT SIGN
283							"\xF9" => "\xD9", # CYRILLIC CAPITAL LETTER YERU --> CYRILLIC SMALL LETTER YERU
284							"\xFA" => "\xDA", # CYRILLIC CAPITAL LETTER ZE --> CYRILLIC SMALL LETTER ZE
285							"\xFB" => "\xDB", # CYRILLIC CAPITAL LETTER SHA --> CYRILLIC SMALL LETTER SHA
286							"\xFC" => "\xDC", # CYRILLIC CAPITAL LETTER E --> CYRILLIC SMALL LETTER E
287							"\xFD" => "\xDD", # CYRILLIC CAPITAL LETTER SHCHA --> CYRILLIC SMALL LETTER SHCHA
288							"\xFE" => "\xDE", # CYRILLIC CAPITAL LETTER CHE --> CYRILLIC SMALL LETTER CHE
289							"\xFF" => "\xDF", # CYRILLIC CAPITAL LETTER HARD SIGN --> CYRILLIC SMALL LETTER HARD SIGN
290							);
291							}
292
293							else {
294							croak "Don't know my package name '@{[__PACKAGE__]}'";
295							}
296
297							#
298							# @ARGV wildcard globbing
299							#
300							sub import {
301
302	0	0		0		0	if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
303	0					0	my @argv = ();
304	0					0	for (@ARGV) {
305
306							# has space
307	0	0				0	if (/\A (?:$q_char)*? [ ] /oxms) {
		0
308	0	0				0	if (my @glob = Char::Ekoi8u::glob(qq{"$_"})) {
309	0					0	push @argv, @glob;
310							}
311							else {
312	0					0	push @argv, $_;
313							}
314							}
315
316							# has wildcard metachar
317							elsif (/\A (?:$q_char)? [?] /oxms) {
318	0	0				0	if (my @glob = Char::Ekoi8u::glob($_)) {
319	0					0	push @argv, @glob;
320							}
321							else {
322	0					0	push @argv, $_;
323							}
324							}
325
326							# no wildcard globbing
327							else {
328	0					0	push @argv, $_;
329							}
330							}
331	0					0	@ARGV = @argv;
332							}
333							}
334
335							# P.230 Care with Prototypes
336							# in Chapter 6: Subroutines
337							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
338							#
339							# If you aren't careful, you can get yourself into trouble with prototypes.
340							# But if you are careful, you can do a lot of neat things with them. This is
341							# all very powerful, of course, and should only be used in moderation to make
342							# the world a better place.
343
344							# P.332 Care with Prototypes
345							# in Chapter 7: Subroutines
346							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
347							#
348							# If you aren't careful, you can get yourself into trouble with prototypes.
349							# But if you are careful, you can do a lot of neat things with them. This is
350							# all very powerful, of course, and should only be used in moderation to make
351							# the world a better place.
352
353							#
354							# Prototypes of subroutines
355							#
356	0			0		0	sub unimport {}
357							sub Char::Ekoi8u::split(;$$$);
358							sub Char::Ekoi8u::tr($$$$;$);
359							sub Char::Ekoi8u::chop(@);
360							sub Char::Ekoi8u::index($$;$);
361							sub Char::Ekoi8u::rindex($$;$);
362							sub Char::Ekoi8u::lcfirst(@);
363							sub Char::Ekoi8u::lcfirst_();
364							sub Char::Ekoi8u::lc(@);
365							sub Char::Ekoi8u::lc_();
366							sub Char::Ekoi8u::ucfirst(@);
367							sub Char::Ekoi8u::ucfirst_();
368							sub Char::Ekoi8u::uc(@);
369							sub Char::Ekoi8u::uc_();
370							sub Char::Ekoi8u::fc(@);
371							sub Char::Ekoi8u::fc_();
372							sub Char::Ekoi8u::ignorecase;
373							sub Char::Ekoi8u::classic_character_class;
374							sub Char::Ekoi8u::capture;
375							sub Char::Ekoi8u::chr(;$);
376							sub Char::Ekoi8u::chr_();
377							sub Char::Ekoi8u::glob($);
378							sub Char::Ekoi8u::glob_();
379
380							sub Char::KOI8U::ord(;$);
381							sub Char::KOI8U::ord_();
382							sub Char::KOI8U::reverse(@);
383							sub Char::KOI8U::getc(;*@);
384							sub Char::KOI8U::length(;$);
385							sub Char::KOI8U::substr($$;$$);
386							sub Char::KOI8U::index($$;$);
387							sub Char::KOI8U::rindex($$;$);
388
389							#
390							# Regexp work
391							#
392	177			177		22012	BEGIN { eval q{ use vars qw(
	177			177		1392
	177					360
	177					96574
393							$Char::KOI8U::re_a
394							$Char::KOI8U::re_t
395							$Char::KOI8U::re_n
396							$Char::KOI8U::re_r
397							) } }
398
399							#
400							# Character class
401							#
402	177			177		14381	BEGIN { eval q{ use vars qw(
	177			177		1359
	177					381
	177					4153060
403							$dot
404							$dot_s
405							$eD
406							$eS
407							$eW
408							$eH
409							$eV
410							$eR
411							$eN
412							$not_alnum
413							$not_alpha
414							$not_ascii
415							$not_blank
416							$not_cntrl
417							$not_digit
418							$not_graph
419							$not_lower
420							$not_lower_i
421							$not_print
422							$not_punct
423							$not_space
424							$not_upper
425							$not_upper_i
426							$not_word
427							$not_xdigit
428							$eb
429							$eB
430							) } }
431
432							${Char::Ekoi8u::dot} = qr{(?:[^\x0A])};
433							${Char::Ekoi8u::dot_s} = qr{(?:[\x00-\xFF])};
434							${Char::Ekoi8u::eD} = qr{(?:[^0-9])};
435
436							# Vertical tabs are now whitespace
437							# \s in a regex now matches a vertical tab in all circumstances.
438							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
439							# ${Char::Ekoi8u::eS} = qr{(?:[^\x09\x0A \x0C\x0D\x20])};
440							# ${Char::Ekoi8u::eS} = qr{(?:[^\x09\x0A\x0B\x0C\x0D\x20])};
441							${Char::Ekoi8u::eS} = qr{(?:[^\s])};
442
443							${Char::Ekoi8u::eW} = qr{(?:[^0-9A-Z_a-z])};
444							${Char::Ekoi8u::eH} = qr{(?:[^\x09\x20])};
445							${Char::Ekoi8u::eV} = qr{(?:[^\x0A\x0B\x0C\x0D])};
446							${Char::Ekoi8u::eR} = qr{(?:\x0D\x0A\|[\x0A\x0D])};
447							${Char::Ekoi8u::eN} = qr{(?:[^\x0A])};
448							${Char::Ekoi8u::not_alnum} = qr{(?:[^\x30-\x39\x41-\x5A\x61-\x7A])};
449							${Char::Ekoi8u::not_alpha} = qr{(?:[^\x41-\x5A\x61-\x7A])};
450							${Char::Ekoi8u::not_ascii} = qr{(?:[^\x00-\x7F])};
451							${Char::Ekoi8u::not_blank} = qr{(?:[^\x09\x20])};
452							${Char::Ekoi8u::not_cntrl} = qr{(?:[^\x00-\x1F\x7F])};
453							${Char::Ekoi8u::not_digit} = qr{(?:[^\x30-\x39])};
454							${Char::Ekoi8u::not_graph} = qr{(?:[^\x21-\x7F])};
455							${Char::Ekoi8u::not_lower} = qr{(?:[^\x61-\x7A])};
456							${Char::Ekoi8u::not_lower_i} = qr{(?:[\x00-\xFF])};
457							${Char::Ekoi8u::not_print} = qr{(?:[^\x20-\x7F])};
458							${Char::Ekoi8u::not_punct} = qr{(?:[^\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E])};
459							${Char::Ekoi8u::not_space} = qr{(?:[^\s\x0B])};
460							${Char::Ekoi8u::not_upper} = qr{(?:[^\x41-\x5A])};
461							${Char::Ekoi8u::not_upper_i} = qr{(?:[\x00-\xFF])};
462							${Char::Ekoi8u::not_word} = qr{(?:[^\x30-\x39\x41-\x5A\x5F\x61-\x7A])};
463							${Char::Ekoi8u::not_xdigit} = qr{(?:[^\x30-\x39\x41-\x46\x61-\x66])};
464							${Char::Ekoi8u::eb} = qr{(?:\A(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[0-9A-Z_a-z])\|(?<=[0-9A-Z_a-z])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]\|\z))};
465							${Char::Ekoi8u::eB} = qr{(?:(?<=[0-9A-Z_a-z])(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]))};
466
467							# avoid: Name "Char::Ekoi8u::foo" used only once: possible typo at here.
468							${Char::Ekoi8u::dot} = ${Char::Ekoi8u::dot};
469							${Char::Ekoi8u::dot_s} = ${Char::Ekoi8u::dot_s};
470							${Char::Ekoi8u::eD} = ${Char::Ekoi8u::eD};
471							${Char::Ekoi8u::eS} = ${Char::Ekoi8u::eS};
472							${Char::Ekoi8u::eW} = ${Char::Ekoi8u::eW};
473							${Char::Ekoi8u::eH} = ${Char::Ekoi8u::eH};
474							${Char::Ekoi8u::eV} = ${Char::Ekoi8u::eV};
475							${Char::Ekoi8u::eR} = ${Char::Ekoi8u::eR};
476							${Char::Ekoi8u::eN} = ${Char::Ekoi8u::eN};
477							${Char::Ekoi8u::not_alnum} = ${Char::Ekoi8u::not_alnum};
478							${Char::Ekoi8u::not_alpha} = ${Char::Ekoi8u::not_alpha};
479							${Char::Ekoi8u::not_ascii} = ${Char::Ekoi8u::not_ascii};
480							${Char::Ekoi8u::not_blank} = ${Char::Ekoi8u::not_blank};
481							${Char::Ekoi8u::not_cntrl} = ${Char::Ekoi8u::not_cntrl};
482							${Char::Ekoi8u::not_digit} = ${Char::Ekoi8u::not_digit};
483							${Char::Ekoi8u::not_graph} = ${Char::Ekoi8u::not_graph};
484							${Char::Ekoi8u::not_lower} = ${Char::Ekoi8u::not_lower};
485							${Char::Ekoi8u::not_lower_i} = ${Char::Ekoi8u::not_lower_i};
486							${Char::Ekoi8u::not_print} = ${Char::Ekoi8u::not_print};
487							${Char::Ekoi8u::not_punct} = ${Char::Ekoi8u::not_punct};
488							${Char::Ekoi8u::not_space} = ${Char::Ekoi8u::not_space};
489							${Char::Ekoi8u::not_upper} = ${Char::Ekoi8u::not_upper};
490							${Char::Ekoi8u::not_upper_i} = ${Char::Ekoi8u::not_upper_i};
491							${Char::Ekoi8u::not_word} = ${Char::Ekoi8u::not_word};
492							${Char::Ekoi8u::not_xdigit} = ${Char::Ekoi8u::not_xdigit};
493							${Char::Ekoi8u::eb} = ${Char::Ekoi8u::eb};
494							${Char::Ekoi8u::eB} = ${Char::Ekoi8u::eB};
495
496							#
497							# KOI8-U split
498							#
499							sub Char::Ekoi8u::split(;$$$) {
500
501							# P.794 29.2.161. split
502							# in Chapter 29: Functions
503							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
504
505							# P.951 split
506							# in Chapter 27: Functions
507							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
508
509	0			0	0	0	my $pattern = $_[0];
510	0					0	my $string = $_[1];
511	0					0	my $limit = $_[2];
512
513							# if $pattern is also omitted or is the literal space, " "
514	0	0				0	if (not defined $pattern) {
515	0					0	$pattern = ' ';
516							}
517
518							# if $string is omitted, the function splits the $_ string
519	0	0				0	if (not defined $string) {
520	0	0				0	if (defined $_) {
521	0					0	$string = $_;
522							}
523							else {
524	0					0	$string = '';
525							}
526							}
527
528	0					0	my @split = ();
529
530							# when string is empty
531	0	0				0	if ($string eq '') {
		0
532
533							# resulting list value in list context
534	0	0				0	if (wantarray) {
535	0					0	return @split;
536							}
537
538							# count of substrings in scalar context
539							else {
540	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
541	0					0	@_ = @split;
542	0					0	return scalar @_;
543							}
544							}
545
546							# split's first argument is more consistently interpreted
547							#
548							# After some changes earlier in v5.17, split's behavior has been simplified:
549							# if the PATTERN argument evaluates to a string containing one space, it is
550							# treated the way that a literal string containing one space once was.
551							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#split's_first_argument_is_more_consistently_interpreted
552
553							# if $pattern is also omitted or is the literal space, " ", the function splits
554							# on whitespace, /\s+/, after skipping any leading whitespace
555							# (and so on)
556
557							elsif ($pattern eq ' ') {
558	0	0				0	if (not defined $limit) {
559	0					0	return CORE::split(' ', $string);
560							}
561							else {
562	0					0	return CORE::split(' ', $string, $limit);
563							}
564							}
565
566							# if $limit is negative, it is treated as if an arbitrarily large $limit has been specified
567	0	0	0			0	if ((not defined $limit) or ($limit <= 0)) {
		0
568
569							# a pattern capable of matching either the null string or something longer than the
570							# null string will split the value of $string into separate characters wherever it
571							# matches the null string between characters
572							# (and so on)
573
574	0	0				0	if ('' =~ / \A $pattern \z /xms) {
575	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
576	0					0	my $limit = scalar(() = $string =~ /($pattern)/oxmsg);
577
578							# P.1024 Appendix W.10 Multibyte Processing
579							# of ISBN 1-56592-224-7 CJKV Information Processing
580							# (and so on)
581
582							# the //m modifier is assumed when you split on the pattern /^/
583							# (and so on)
584
585							# V
586	0		0			0	while ((--$limit > 0) and ($string =~ s/\A((?:$q_char)+?)$pattern//m)) {
587
588							# if the $pattern contains parentheses, then the substring matched by each pair of parentheses
589							# is included in the resulting list, interspersed with the fields that are ordinarily returned
590							# (and so on)
591
592	0					0	local $@;
593	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
594	0					0	push @split, eval('$' . $digit);
595							}
596							}
597							}
598
599							else {
600	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
601
602							# V
603	0					0	while ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
604	0					0	local $@;
605	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
606	0					0	push @split, eval('$' . $digit);
607							}
608							}
609							}
610							}
611
612							elsif ($limit > 0) {
613	0	0				0	if ('' =~ / \A $pattern \z /xms) {
614	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
615	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
616
617							# V
618	0	0				0	if ($string =~ s/\A((?:$q_char)+?)$pattern//m) {
619	0					0	local $@;
620	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
621	0					0	push @split, eval('$' . $digit);
622							}
623							}
624							}
625							}
626							else {
627	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
628	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
629
630							# V
631	0	0				0	if ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
632	0					0	local $@;
633	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
634	0					0	push @split, eval('$' . $digit);
635							}
636							}
637							}
638							}
639							}
640
641	0	0				0	if (CORE::length($string) > 0) {
642	0					0	push @split, $string;
643							}
644
645							# if $_[2] (NOT "$limit") is omitted or zero, trailing null fields are stripped from the result
646	0	0	0			0	if ((not defined $_[2]) or ($_[2] == 0)) {
647	0		0			0	while ((scalar(@split) >= 1) and ($split[-1] eq '')) {
648	0					0	pop @split;
649							}
650							}
651
652							# resulting list value in list context
653	0	0				0	if (wantarray) {
654	0					0	return @split;
655							}
656
657							# count of substrings in scalar context
658							else {
659	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
660	0					0	@_ = @split;
661	0					0	return scalar @_;
662							}
663							}
664
665							#
666							# get last subexpression offsets
667							#
668							sub _last_subexpression_offsets {
669	0			0		0	my $pattern = $_[0];
670
671							# remove comment
672	0					0	$pattern =~ s/$\?\# .*? $//oxmsg;
673
674	0					0	my $modifier = '';
675	0	0				0	if ($pattern =~ /\(\?\^? ([\-A-Za-z]+) :/oxms) {
676	0					0	$modifier = $1;
677	0					0	$modifier =~ s/-[A-Za-z]*//;
678							}
679
680							# with /x modifier
681	0					0	my @char = ();
682	0	0				0	if ($modifier =~ /x/oxms) {
683	0					0	@char = $pattern =~ /\G(
684							\\ (?:$q_char) \|
685							\# (?:$q_char)*? $ \|
686							\[ (?: \\\] \| (?:$q_char))+? \] \|
687							\(\? \|
688							(?:$q_char)
689							)/oxmsg;
690							}
691
692							# without /x modifier
693							else {
694	0					0	@char = $pattern =~ /\G(
695							\\ (?:$q_char) \|
696							\[ (?: \\\] \| (?:$q_char))+? \] \|
697							\(\? \|
698							(?:$q_char)
699							)/oxmsg;
700							}
701
702	0					0	return scalar grep { $_ eq '(' } @char;
	0					0
703							}
704
705							#
706							# KOI8-U transliteration (tr///)
707							#
708							sub Char::Ekoi8u::tr($$$$;$) {
709
710	0			0	0	0	my $bind_operator = $_[1];
711	0					0	my $searchlist = $_[2];
712	0					0	my $replacementlist = $_[3];
713	0		0			0	my $modifier = $_[4] \|\| '';
714
715	0	0				0	if ($modifier =~ /r/oxms) {
716	0	0				0	if ($bind_operator =~ / !~ /oxms) {
717	0					0	croak "Using !~ with tr///r doesn't make sense";
718							}
719							}
720
721	0					0	my @char = $_[0] =~ /\G ($q_char) /oxmsg;
722	0					0	my @searchlist = _charlist_tr($searchlist);
723	0					0	my @replacementlist = _charlist_tr($replacementlist);
724
725	0					0	my %tr = ();
726	0					0	for (my $i=0; $i <= $#searchlist; $i++) {
727	0	0				0	if (not exists $tr{$searchlist[$i]}) {
728	0	0	0			0	if (defined $replacementlist[$i] and ($replacementlist[$i] ne '')) {
		0	0
		0
729	0					0	$tr{$searchlist[$i]} = $replacementlist[$i];
730							}
731							elsif ($modifier =~ /d/oxms) {
732	0					0	$tr{$searchlist[$i]} = '';
733							}
734							elsif (defined $replacementlist[-1] and ($replacementlist[-1] ne '')) {
735	0					0	$tr{$searchlist[$i]} = $replacementlist[-1];
736							}
737							else {
738	0					0	$tr{$searchlist[$i]} = $searchlist[$i];
739							}
740							}
741							}
742
743	0					0	my $tr = 0;
744	0					0	my $replaced = '';
745	0	0				0	if ($modifier =~ /c/oxms) {
746	0					0	while (defined(my $char = shift @char)) {
747	0	0				0	if (not exists $tr{$char}) {
748	0	0				0	if (defined $replacementlist[0]) {
749	0					0	$replaced .= $replacementlist[0];
750							}
751	0					0	$tr++;
752	0	0				0	if ($modifier =~ /s/oxms) {
753	0		0			0	while (@char and (not exists $tr{$char[0]})) {
754	0					0	shift @char;
755	0					0	$tr++;
756							}
757							}
758							}
759							else {
760	0					0	$replaced .= $char;
761							}
762							}
763							}
764							else {
765	0					0	while (defined(my $char = shift @char)) {
766	0	0				0	if (exists $tr{$char}) {
767	0					0	$replaced .= $tr{$char};
768	0					0	$tr++;
769	0	0				0	if ($modifier =~ /s/oxms) {
770	0		0			0	while (@char and (exists $tr{$char[0]}) and ($tr{$char[0]} eq $tr{$char})) {
			0
771	0					0	shift @char;
772	0					0	$tr++;
773							}
774							}
775							}
776							else {
777	0					0	$replaced .= $char;
778							}
779							}
780							}
781
782	0	0				0	if ($modifier =~ /r/oxms) {
783	0					0	return $replaced;
784							}
785							else {
786	0					0	$_[0] = $replaced;
787	0	0				0	if ($bind_operator =~ / !~ /oxms) {
788	0					0	return not $tr;
789							}
790							else {
791	0					0	return $tr;
792							}
793							}
794							}
795
796							#
797							# KOI8-U chop
798							#
799							sub Char::Ekoi8u::chop(@) {
800
801	0			0	0	0	my $chop;
802	0	0				0	if (@_ == 0) {
803	0					0	my @char = /\G ($q_char) /oxmsg;
804	0					0	$chop = pop @char;
805	0					0	$_ = join '', @char;
806							}
807							else {
808	0					0	for (@_) {
809	0					0	my @char = /\G ($q_char) /oxmsg;
810	0					0	$chop = pop @char;
811	0					0	$_ = join '', @char;
812							}
813							}
814	0					0	return $chop;
815							}
816
817							#
818							# KOI8-U index by octet
819							#
820							sub Char::Ekoi8u::index($$;$) {
821
822	0			0	1	0	my($str,$substr,$position) = @_;
823	0		0			0	$position \|\|= 0;
824	0					0	my $pos = 0;
825
826	0					0	while ($pos < CORE::length($str)) {
827	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
828	0	0				0	if ($pos >= $position) {
829	0					0	return $pos;
830							}
831							}
832	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
833	0					0	$pos += CORE::length($1);
834							}
835							else {
836	0					0	$pos += 1;
837							}
838							}
839	0					0	return -1;
840							}
841
842							#
843							# KOI8-U reverse index
844							#
845							sub Char::Ekoi8u::rindex($$;$) {
846
847	0			0	0	0	my($str,$substr,$position) = @_;
848	0		0			0	$position \|\|= CORE::length($str) - 1;
849	0					0	my $pos = 0;
850	0					0	my $rindex = -1;
851
852	0		0			0	while (($pos < CORE::length($str)) and ($pos <= $position)) {
853	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
854	0					0	$rindex = $pos;
855							}
856	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
857	0					0	$pos += CORE::length($1);
858							}
859							else {
860	0					0	$pos += 1;
861							}
862							}
863	0					0	return $rindex;
864							}
865
866							#
867							# KOI8-U lower case first with parameter
868							#
869							sub Char::Ekoi8u::lcfirst(@) {
870	0	0		0	0	0	if (@_) {
871	0					0	my $s = shift @_;
872	0	0	0			0	if (@_ and wantarray) {
873	0					0	return Char::Ekoi8u::lc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
874							}
875							else {
876	0					0	return Char::Ekoi8u::lc(CORE::substr($s,0,1)) . CORE::substr($s,1);
877							}
878							}
879							else {
880	0					0	return Char::Ekoi8u::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
881							}
882							}
883
884							#
885							# KOI8-U lower case first without parameter
886							#
887							sub Char::Ekoi8u::lcfirst_() {
888	0			0	0	0	return Char::Ekoi8u::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
889							}
890
891							#
892							# KOI8-U lower case with parameter
893							#
894							sub Char::Ekoi8u::lc(@) {
895	0	0		0	0	0	if (@_) {
896	0					0	my $s = shift @_;
897	0	0	0			0	if (@_ and wantarray) {
898	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
899							}
900							else {
901	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
902							}
903							}
904							else {
905	0					0	return Char::Ekoi8u::lc_();
906							}
907							}
908
909							#
910							# KOI8-U lower case without parameter
911							#
912							sub Char::Ekoi8u::lc_() {
913	0			0	0	0	my $s = $_;
914	0	0				0	return join '', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
915							}
916
917							#
918							# KOI8-U upper case first with parameter
919							#
920							sub Char::Ekoi8u::ucfirst(@) {
921	0	0		0	0	0	if (@_) {
922	0					0	my $s = shift @_;
923	0	0	0			0	if (@_ and wantarray) {
924	0					0	return Char::Ekoi8u::uc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
925							}
926							else {
927	0					0	return Char::Ekoi8u::uc(CORE::substr($s,0,1)) . CORE::substr($s,1);
928							}
929							}
930							else {
931	0					0	return Char::Ekoi8u::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
932							}
933							}
934
935							#
936							# KOI8-U upper case first without parameter
937							#
938							sub Char::Ekoi8u::ucfirst_() {
939	0			0	0	0	return Char::Ekoi8u::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
940							}
941
942							#
943							# KOI8-U upper case with parameter
944							#
945							sub Char::Ekoi8u::uc(@) {
946	0	0		0	0	0	if (@_) {
947	0					0	my $s = shift @_;
948	0	0	0			0	if (@_ and wantarray) {
949	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
950							}
951							else {
952	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
953							}
954							}
955							else {
956	0					0	return Char::Ekoi8u::uc_();
957							}
958							}
959
960							#
961							# KOI8-U upper case without parameter
962							#
963							sub Char::Ekoi8u::uc_() {
964	0			0	0	0	my $s = $_;
965	0	0				0	return join '', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
966							}
967
968							#
969							# KOI8-U fold case with parameter
970							#
971							sub Char::Ekoi8u::fc(@) {
972	0	0		0	0	0	if (@_) {
973	0					0	my $s = shift @_;
974	0	0	0			0	if (@_ and wantarray) {
975	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
976							}
977							else {
978	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
979							}
980							}
981							else {
982	0					0	return Char::Ekoi8u::fc_();
983							}
984							}
985
986							#
987							# KOI8-U fold case without parameter
988							#
989							sub Char::Ekoi8u::fc_() {
990	0			0	0	0	my $s = $_;
991	0	0				0	return join '', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
992							}
993
994							#
995							# KOI8-U regexp capture
996							#
997							{
998							sub Char::Ekoi8u::capture {
999	0			0	1	0	return $_[0];
1000							}
1001							}
1002
1003							#
1004							# KOI8-U regexp ignore case modifier
1005							#
1006							sub Char::Ekoi8u::ignorecase {
1007
1008	0			0	0	0	my @string = @_;
1009	0					0	my $metachar = qr/[\@\\\|[\]{]/oxms;
1010
1011							# ignore case of $scalar or @array
1012	0					0	for my $string (@string) {
1013
1014							# split regexp
1015	0					0	my @char = $string =~ /\G(
1016							\[\^ \|
1017							\\? (?:$q_char)
1018							)/oxmsg;
1019
1020							# unescape character
1021	0					0	for (my $i=0; $i <= $#char; $i++) {
1022	0	0				0	next if not defined $char[$i];
1023
1024							# open character class [...]
1025	0	0				0	if ($char[$i] eq '[') {
		0
		0
		0
1026	0					0	my $left = $i;
1027
1028							# [] make die "unmatched [] in regexp ..."
1029
1030	0	0				0	if ($char[$i+1] eq ']') {
1031	0					0	$i++;
1032							}
1033
1034	0					0	while (1) {
1035	0	0				0	if (++$i > $#char) {
1036	0					0	croak "Unmatched [] in regexp";
1037							}
1038	0	0				0	if ($char[$i] eq ']') {
1039	0					0	my $right = $i;
1040	0					0	my @charlist = charlist_qr(@char[$left+1..$right-1], 'i');
1041
1042							# escape character
1043	0					0	for my $char (@charlist) {
1044	0	0				0	if (0) {
1045							}
1046
1047	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1048	0					0	$char = $1 . '\\' . $char;
1049							}
1050							}
1051
1052							# [...]
1053	0					0	splice @char, $left, $right-$left+1, '(?:' . join('\|', @charlist) . ')';
1054
1055	0					0	$i = $left;
1056	0					0	last;
1057							}
1058							}
1059							}
1060
1061							# open character class [^...]
1062							elsif ($char[$i] eq '[^') {
1063	0					0	my $left = $i;
1064
1065							# [^] make die "unmatched [] in regexp ..."
1066
1067	0	0				0	if ($char[$i+1] eq ']') {
1068	0					0	$i++;
1069							}
1070
1071	0					0	while (1) {
1072	0	0				0	if (++$i > $#char) {
1073	0					0	croak "Unmatched [] in regexp";
1074							}
1075	0	0				0	if ($char[$i] eq ']') {
1076	0					0	my $right = $i;
1077	0					0	my @charlist = charlist_not_qr(@char[$left+1..$right-1], 'i');
1078
1079							# escape character
1080	0					0	for my $char (@charlist) {
1081	0	0				0	if (0) {
1082							}
1083
1084	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1085	0					0	$char = '\\' . $char;
1086							}
1087							}
1088
1089							# [^...]
1090	0					0	splice @char, $left, $right-$left+1, '(?!' . join('\|', @charlist) . ")(?:$your_char)";
1091
1092	0					0	$i = $left;
1093	0					0	last;
1094							}
1095							}
1096							}
1097
1098							# rewrite classic character class or escape character
1099							elsif (my $char = classic_character_class($char[$i])) {
1100	0					0	$char[$i] = $char;
1101							}
1102
1103							# with /i modifier
1104							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1105	0					0	my $uc = Char::Ekoi8u::uc($char[$i]);
1106	0					0	my $fc = Char::Ekoi8u::fc($char[$i]);
1107	0	0				0	if ($uc ne $fc) {
1108	0	0				0	if (CORE::length($fc) == 1) {
1109	0					0	$char[$i] = '[' . $uc . $fc . ']';
1110							}
1111							else {
1112	0					0	$char[$i] = '(?:' . $uc . '\|' . $fc . ')';
1113							}
1114							}
1115							}
1116							}
1117
1118							# characterize
1119	0					0	for (my $i=0; $i <= $#char; $i++) {
1120	0	0				0	next if not defined $char[$i];
1121
1122	0	0				0	if (0) {
1123							}
1124
1125							# quote character before ? + * {
1126	0	0				0	elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
1127	0	0				0	if ($char[$i-1] !~ /\A [\x00-\xFF] \z/oxms) {
1128	0					0	$char[$i-1] = '(?:' . $char[$i-1] . ')';
1129							}
1130							}
1131							}
1132
1133	0					0	$string = join '', @char;
1134							}
1135
1136							# make regexp string
1137	0					0	return @string;
1138							}
1139
1140							#
1141							# classic character class ( \D \S \W \d \s \w \C \X \H \V \h \v \R \N \b \B )
1142							#
1143							sub Char::Ekoi8u::classic_character_class {
1144	0			0	0	0	my($char) = @_;
1145
1146							return {
1147	0		0			0	'\D' => '${Char::Ekoi8u::eD}',
1148							'\S' => '${Char::Ekoi8u::eS}',
1149							'\W' => '${Char::Ekoi8u::eW}',
1150							'\d' => '[0-9]',
1151
1152							# Before Perl 5.6, \s only matched the five whitespace characters
1153							# tab, newline, form-feed, carriage return, and the space character
1154							# itself, which, taken together, is the character class [\t\n\f\r ].
1155
1156							# Vertical tabs are now whitespace
1157							# \s in a regex now matches a vertical tab in all circumstances.
1158							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1159							# \t \n \v \f \r space
1160							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1161							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1162							'\s' => '\s',
1163
1164							'\w' => '[0-9A-Z_a-z]',
1165							'\C' => '[\x00-\xFF]',
1166							'\X' => 'X',
1167
1168							# \h \v \H \V
1169
1170							# P.114 Character Class Shortcuts
1171							# in Chapter 7: In the World of Regular Expressions
1172							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
1173
1174							# P.357 13.2.3 Whitespace
1175							# in Chapter 13: perlrecharclass: Perl Regular Expression Character Classes
1176							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1177							#
1178							# 0x00009 CHARACTER TABULATION h s
1179							# 0x0000a LINE FEED (LF) vs
1180							# 0x0000b LINE TABULATION v
1181							# 0x0000c FORM FEED (FF) vs
1182							# 0x0000d CARRIAGE RETURN (CR) vs
1183							# 0x00020 SPACE h s
1184
1185							# P.196 Table 5-9. Alphanumeric regex metasymbols
1186							# in Chapter 5. Pattern Matching
1187							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1188
1189							# (and so on)
1190
1191							'\H' => '${Char::Ekoi8u::eH}',
1192							'\V' => '${Char::Ekoi8u::eV}',
1193							'\h' => '[\x09\x20]',
1194							'\v' => '[\x0A\x0B\x0C\x0D]',
1195							'\R' => '${Char::Ekoi8u::eR}',
1196
1197							# \N
1198							#
1199							# http://perldoc.perl.org/perlre.html
1200							# Character Classes and other Special Escapes
1201							# Any character but \n (experimental). Not affected by /s modifier
1202
1203							'\N' => '${Char::Ekoi8u::eN}',
1204
1205							# \b \B
1206
1207							# P.180 Boundaries: The \b and \B Assertions
1208							# in Chapter 5: Pattern Matching
1209							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1210
1211							# P.219 Boundaries: The \b and \B Assertions
1212							# in Chapter 5: Pattern Matching
1213							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1214
1215							# '\b' => '(?:(?<=\A\|\W)(?=\w)\|(?<=\w)(?=\W\|\z))',
1216							'\b' => '${Char::Ekoi8u::eb}',
1217
1218							# '\B' => '(?:(?<=\w)(?=\w)\|(?<=\W)(?=\W))',
1219							'\B' => '${Char::Ekoi8u::eB}',
1220
1221							}->{$char} \|\| '';
1222							}
1223
1224							#
1225							# prepare KOI8-U characters per length
1226							#
1227
1228							# 1 octet characters
1229							my @chars1 = ();
1230							sub chars1 {
1231	0	0		0	0	0	if (@chars1) {
1232	0					0	return @chars1;
1233							}
1234	0	0				0	if (exists $range_tr{1}) {
1235	0					0	my @ranges = @{ $range_tr{1} };
	0					0
1236	0					0	while (my @range = splice(@ranges,0,1)) {
1237	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1238	0					0	push @chars1, pack 'C', $oct0;
1239							}
1240							}
1241							}
1242	0					0	return @chars1;
1243							}
1244
1245							# 2 octets characters
1246							my @chars2 = ();
1247							sub chars2 {
1248	0	0		0	0	0	if (@chars2) {
1249	0					0	return @chars2;
1250							}
1251	0	0				0	if (exists $range_tr{2}) {
1252	0					0	my @ranges = @{ $range_tr{2} };
	0					0
1253	0					0	while (my @range = splice(@ranges,0,2)) {
1254	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1255	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1256	0					0	push @chars2, pack 'CC', $oct0,$oct1;
1257							}
1258							}
1259							}
1260							}
1261	0					0	return @chars2;
1262							}
1263
1264							# 3 octets characters
1265							my @chars3 = ();
1266							sub chars3 {
1267	0	0		0	0	0	if (@chars3) {
1268	0					0	return @chars3;
1269							}
1270	0	0				0	if (exists $range_tr{3}) {
1271	0					0	my @ranges = @{ $range_tr{3} };
	0					0
1272	0					0	while (my @range = splice(@ranges,0,3)) {
1273	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1274	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1275	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1276	0					0	push @chars3, pack 'CCC', $oct0,$oct1,$oct2;
1277							}
1278							}
1279							}
1280							}
1281							}
1282	0					0	return @chars3;
1283							}
1284
1285							# 4 octets characters
1286							my @chars4 = ();
1287							sub chars4 {
1288	0	0		0	0	0	if (@chars4) {
1289	0					0	return @chars4;
1290							}
1291	0	0				0	if (exists $range_tr{4}) {
1292	0					0	my @ranges = @{ $range_tr{4} };
	0					0
1293	0					0	while (my @range = splice(@ranges,0,4)) {
1294	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1295	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1296	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1297	0					0	for my $oct3 (@{$range[3]}) {
	0					0
1298	0					0	push @chars4, pack 'CCCC', $oct0,$oct1,$oct2,$oct3;
1299							}
1300							}
1301							}
1302							}
1303							}
1304							}
1305	0					0	return @chars4;
1306							}
1307
1308							#
1309							# KOI8-U open character list for tr
1310							#
1311							sub _charlist_tr {
1312
1313	0			0		0	local $_ = shift @_;
1314
1315							# unescape character
1316	0					0	my @char = ();
1317	0					0	while (not /\G \z/oxmsgc) {
1318	0	0				0	if (/\G (\\0?55\|\\x2[Dd]\|\\-) /oxmsgc) {
		0
		0
		0
		0
		0
		0
1319	0					0	push @char, '\-';
1320							}
1321							elsif (/\G \\ ([0-7]{2,3}) /oxmsgc) {
1322	0					0	push @char, CORE::chr(oct $1);
1323							}
1324							elsif (/\G \\x ([0-9A-Fa-f]{1,2}) /oxmsgc) {
1325	0					0	push @char, CORE::chr(hex $1);
1326							}
1327							elsif (/\G \\c ([\x40-\x5F]) /oxmsgc) {
1328	0					0	push @char, CORE::chr(CORE::ord($1) & 0x1F);
1329							}
1330							elsif (/\G (\\ [0nrtfbae]) /oxmsgc) {
1331	0					0	push @char, {
1332							'\0' => "\0",
1333							'\n' => "\n",
1334							'\r' => "\r",
1335							'\t' => "\t",
1336							'\f' => "\f",
1337							'\b' => "\x08", # \b means backspace in character class
1338							'\a' => "\a",
1339							'\e' => "\e",
1340							}->{$1};
1341							}
1342							elsif (/\G \\ ($q_char) /oxmsgc) {
1343	0					0	push @char, $1;
1344							}
1345							elsif (/\G ($q_char) /oxmsgc) {
1346	0					0	push @char, $1;
1347							}
1348							}
1349
1350							# join separated multiple-octet
1351	0					0	@char = join('',@char) =~ /\G (\\-\|$q_char) /oxmsg;
1352
1353							# unescape '-'
1354	0					0	my @i = ();
1355	0					0	for my $i (0 .. $#char) {
1356	0	0				0	if ($char[$i] eq '\-') {
		0
1357	0					0	$char[$i] = '-';
1358							}
1359							elsif ($char[$i] eq '-') {
1360	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1361	0					0	push @i, $i;
1362							}
1363							}
1364							}
1365
1366							# open character list (reverse for splice)
1367	0					0	for my $i (CORE::reverse @i) {
1368	0					0	my @range = ();
1369
1370							# range error
1371	0	0	0			0	if ((CORE::length($char[$i-1]) > CORE::length($char[$i+1])) or ($char[$i-1] gt $char[$i+1])) {
1372	0					0	croak "Invalid tr/// range \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1373							}
1374
1375							# range of multiple-octet code
1376	0	0				0	if (CORE::length($char[$i-1]) == 1) {
		0
		0
		0
1377	0	0				0	if (CORE::length($char[$i+1]) == 1) {
		0
		0
		0
1378	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars1();
	0					0
1379							}
1380							elsif (CORE::length($char[$i+1]) == 2) {
1381	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1382	0					0	push @range, grep {$_ le $char[$i+1]} chars2();
	0					0
1383							}
1384							elsif (CORE::length($char[$i+1]) == 3) {
1385	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1386	0					0	push @range, chars2();
1387	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1388							}
1389							elsif (CORE::length($char[$i+1]) == 4) {
1390	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1391	0					0	push @range, chars2();
1392	0					0	push @range, chars3();
1393	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1394							}
1395							else {
1396	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1397							}
1398							}
1399							elsif (CORE::length($char[$i-1]) == 2) {
1400	0	0				0	if (CORE::length($char[$i+1]) == 2) {
		0
		0
1401	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars2();
	0					0
1402							}
1403							elsif (CORE::length($char[$i+1]) == 3) {
1404	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1405	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1406							}
1407							elsif (CORE::length($char[$i+1]) == 4) {
1408	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1409	0					0	push @range, chars3();
1410	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1411							}
1412							else {
1413	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1414							}
1415							}
1416							elsif (CORE::length($char[$i-1]) == 3) {
1417	0	0				0	if (CORE::length($char[$i+1]) == 3) {
		0
1418	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars3();
	0					0
1419							}
1420							elsif (CORE::length($char[$i+1]) == 4) {
1421	0					0	push @range, grep {$char[$i-1] le $_} chars3();
	0					0
1422	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1423							}
1424							else {
1425	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1426							}
1427							}
1428							elsif (CORE::length($char[$i-1]) == 4) {
1429	0	0				0	if (CORE::length($char[$i+1]) == 4) {
1430	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars4();
	0					0
1431							}
1432							else {
1433	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1434							}
1435							}
1436							else {
1437	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1438							}
1439
1440	0					0	splice @char, $i-1, 3, @range;
1441							}
1442
1443	0					0	return @char;
1444							}
1445
1446							#
1447							# KOI8-U open character class
1448							#
1449							sub _cc {
1450	0	0		0		0	if (scalar(@_) == 0) {
		0
		0
1451	0					0	die __FILE__, ": subroutine cc got no parameter.";
1452							}
1453							elsif (scalar(@_) == 1) {
1454	0					0	return sprintf('\x%02X',$_[0]);
1455							}
1456							elsif (scalar(@_) == 2) {
1457	0	0				0	if ($_[0] > $_[1]) {
		0
		0
1458	0					0	die __FILE__, ": subroutine cc got \$_[0] > \$_[1] parameters).";
1459							}
1460							elsif ($_[0] == $_[1]) {
1461	0					0	return sprintf('\x%02X',$_[0]);
1462							}
1463							elsif (($_[0]+1) == $_[1]) {
1464	0					0	return sprintf('[\\x%02X\\x%02X]',$_[0],$_[1]);
1465							}
1466							else {
1467	0					0	return sprintf('[\\x%02X-\\x%02X]',$_[0],$_[1]);
1468							}
1469							}
1470							else {
1471	0					0	die __FILE__, ": subroutine cc got 3 or more parameters (@{[scalar(@_)]} parameters).";
	0					0
1472							}
1473							}
1474
1475							#
1476							# KOI8-U octet range
1477							#
1478							sub _octets {
1479	0			0		0	my $length = shift @_;
1480
1481	0	0				0	if ($length == 1) {
1482	0					0	my($a1) = unpack 'C', $_[0];
1483	0					0	my($z1) = unpack 'C', $_[1];
1484
1485	0	0				0	if ($a1 > $z1) {
1486	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$a1) . '-\x' . unpack('H',$z1);
1487							}
1488
1489	0	0				0	if ($a1 == $z1) {
		0
1490	0					0	return sprintf('\x%02X',$a1);
1491							}
1492							elsif (($a1+1) == $z1) {
1493	0					0	return sprintf('\x%02X\x%02X',$a1,$z1);
1494							}
1495							else {
1496	0					0	return sprintf('\x%02X-\x%02X',$a1,$z1);
1497							}
1498							}
1499							else {
1500	0					0	die __FILE__, ": subroutine _octets got invalid length ($length).";
1501							}
1502							}
1503
1504							#
1505							# KOI8-U range regexp
1506							#
1507							sub _range_regexp {
1508	0			0		0	my($length,$first,$last) = @_;
1509
1510	0					0	my @range_regexp = ();
1511	0	0				0	if (not exists $range_tr{$length}) {
1512	0					0	return @range_regexp;
1513							}
1514
1515	0					0	my @ranges = @{ $range_tr{$length} };
	0					0
1516	0					0	while (my @range = splice(@ranges,0,$length)) {
1517	0					0	my $min = '';
1518	0					0	my $max = '';
1519	0					0	for (my $i=0; $i < $length; $i++) {
1520	0					0	$min .= pack 'C', $range[$i][0];
1521	0					0	$max .= pack 'C', $range[$i][-1];
1522							}
1523
1524							# min___max
1525							# FIRST_____________LAST
1526							# (nothing)
1527
1528	0	0	0			0	if ($max lt $first) {
		0	0
		0	0
		0	0
		0	0
		0	0
		0	0
1529							}
1530
1531							# **********
1532							# min_________max
1533							# FIRST_____________LAST
1534							# **********
1535
1536							elsif (($min le $first) and ($first le $max) and ($max le $last)) {
1537	0					0	push @range_regexp, _octets($length,$first,$max,$min,$max);
1538							}
1539
1540							# **********************
1541							# min________________max
1542							# FIRST_____________LAST
1543							# **********************
1544
1545							elsif (($min eq $first) and ($max eq $last)) {
1546	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1547							}
1548
1549							# *********
1550							# min___max
1551							# FIRST_____________LAST
1552							# *********
1553
1554							elsif (($first le $min) and ($max le $last)) {
1555	0					0	push @range_regexp, _octets($length,$min,$max,$min,$max);
1556							}
1557
1558							# **********************
1559							# min__________________________max
1560							# FIRST_____________LAST
1561							# **********************
1562
1563							elsif (($min le $first) and ($last le $max)) {
1564	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1565							}
1566
1567							# *********
1568							# min________max
1569							# FIRST_____________LAST
1570							# *********
1571
1572							elsif (($first le $min) and ($min le $last) and ($last le $max)) {
1573	0					0	push @range_regexp, _octets($length,$min,$last,$min,$max);
1574							}
1575
1576							# min___max
1577							# FIRST_____________LAST
1578							# (nothing)
1579
1580							elsif ($last lt $min) {
1581							}
1582
1583							else {
1584	0					0	die __FILE__, ": subroutine _range_regexp panic.";
1585							}
1586							}
1587
1588	0					0	return @range_regexp;
1589							}
1590
1591							#
1592							# KOI8-U open character list for qr and not qr
1593							#
1594							sub _charlist {
1595
1596	0			0		0	my $modifier = pop @_;
1597	0					0	my @char = @_;
1598
1599	0	0				0	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
1600
1601							# unescape character
1602	0					0	for (my $i=0; $i <= $#char; $i++) {
1603
1604							# escape - to ...
1605	0	0	0			0	if ($char[$i] eq '-') {
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
1606	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1607	0					0	$char[$i] = '...';
1608							}
1609							}
1610
1611							# octal escape sequence
1612							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
1613	0					0	$char[$i] = octchr($1);
1614							}
1615
1616							# hexadecimal escape sequence
1617							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
1618	0					0	$char[$i] = hexchr($1);
1619							}
1620
1621							# \N{CHARNAME} --> N\{CHARNAME}
1622							elsif ($char[$i] =~ /\A \\ (N) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1623	0					0	$char[$i] = $1 . '\\' . $2;
1624							}
1625
1626							# \p{PROPERTY} --> p\{PROPERTY}
1627							elsif ($char[$i] =~ /\A \\ (p) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1628	0					0	$char[$i] = $1 . '\\' . $2;
1629							}
1630
1631							# \P{PROPERTY} --> P\{PROPERTY}
1632							elsif ($char[$i] =~ /\A \\ (P) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1633	0					0	$char[$i] = $1 . '\\' . $2;
1634							}
1635
1636							# \p, \P, \X --> p, P, X
1637							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
1638	0					0	$char[$i] = $1;
1639							}
1640
1641							elsif ($char[$i] =~ /\A \\ ([0-7]{2,3}) \z/oxms) {
1642	0					0	$char[$i] = CORE::chr oct $1;
1643							}
1644							elsif ($char[$i] =~ /\A \\x ([0-9A-Fa-f]{1,2}) \z/oxms) {
1645	0					0	$char[$i] = CORE::chr hex $1;
1646							}
1647							elsif ($char[$i] =~ /\A \\c ([\x40-\x5F]) \z/oxms) {
1648	0					0	$char[$i] = CORE::chr(CORE::ord($1) & 0x1F);
1649							}
1650							elsif ($char[$i] =~ /\A (\\ [0nrtfbaedswDSWHVhvR]) \z/oxms) {
1651	0					0	$char[$i] = {
1652							'\0' => "\0",
1653							'\n' => "\n",
1654							'\r' => "\r",
1655							'\t' => "\t",
1656							'\f' => "\f",
1657							'\b' => "\x08", # \b means backspace in character class
1658							'\a' => "\a",
1659							'\e' => "\e",
1660							'\d' => '[0-9]',
1661
1662							# Vertical tabs are now whitespace
1663							# \s in a regex now matches a vertical tab in all circumstances.
1664							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1665							# \t \n \v \f \r space
1666							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1667							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1668							'\s' => '\s',
1669
1670							'\w' => '[0-9A-Z_a-z]',
1671							'\D' => '${Char::Ekoi8u::eD}',
1672							'\S' => '${Char::Ekoi8u::eS}',
1673							'\W' => '${Char::Ekoi8u::eW}',
1674
1675							'\H' => '${Char::Ekoi8u::eH}',
1676							'\V' => '${Char::Ekoi8u::eV}',
1677							'\h' => '[\x09\x20]',
1678							'\v' => '[\x0A\x0B\x0C\x0D]',
1679							'\R' => '${Char::Ekoi8u::eR}',
1680
1681							}->{$1};
1682							}
1683
1684							# POSIX-style character classes
1685							elsif ($ignorecase and ($char[$i] =~ /\A ( \[\: \^? (?:lower\|upper) :\] ) \z/oxms)) {
1686	0					0	$char[$i] = {
1687
1688							'[:lower:]' => '[\x41-\x5A\x61-\x7A]',
1689							'[:upper:]' => '[\x41-\x5A\x61-\x7A]',
1690							'[:^lower:]' => '${Char::Ekoi8u::not_lower_i}',
1691							'[:^upper:]' => '${Char::Ekoi8u::not_upper_i}',
1692
1693							}->{$1};
1694							}
1695							elsif ($char[$i] =~ /\A ( \[\: \^? (?:alnum\|alpha\|ascii\|blank\|cntrl\|digit\|graph\|lower\|print\|punct\|space\|upper\|word\|xdigit) :\] ) \z/oxms) {
1696	0					0	$char[$i] = {
1697
1698							'[:alnum:]' => '[\x30-\x39\x41-\x5A\x61-\x7A]',
1699							'[:alpha:]' => '[\x41-\x5A\x61-\x7A]',
1700							'[:ascii:]' => '[\x00-\x7F]',
1701							'[:blank:]' => '[\x09\x20]',
1702							'[:cntrl:]' => '[\x00-\x1F\x7F]',
1703							'[:digit:]' => '[\x30-\x39]',
1704							'[:graph:]' => '[\x21-\x7F]',
1705							'[:lower:]' => '[\x61-\x7A]',
1706							'[:print:]' => '[\x20-\x7F]',
1707							'[:punct:]' => '[\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]',
1708
1709							# P.174 POSIX-Style Character Classes
1710							# in Chapter 5: Pattern Matching
1711							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1712
1713							# P.311 11.2.4 Character Classes and other Special Escapes
1714							# in Chapter 11: perlre: Perl regular expressions
1715							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1716
1717							# P.210 POSIX-Style Character Classes
1718							# in Chapter 5: Pattern Matching
1719							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1720
1721							'[:space:]' => '[\s\x0B]', # "\s" plus vertical tab ("\cK")
1722
1723							'[:upper:]' => '[\x41-\x5A]',
1724							'[:word:]' => '[\x30-\x39\x41-\x5A\x5F\x61-\x7A]',
1725							'[:xdigit:]' => '[\x30-\x39\x41-\x46\x61-\x66]',
1726							'[:^alnum:]' => '${Char::Ekoi8u::not_alnum}',
1727							'[:^alpha:]' => '${Char::Ekoi8u::not_alpha}',
1728							'[:^ascii:]' => '${Char::Ekoi8u::not_ascii}',
1729							'[:^blank:]' => '${Char::Ekoi8u::not_blank}',
1730							'[:^cntrl:]' => '${Char::Ekoi8u::not_cntrl}',
1731							'[:^digit:]' => '${Char::Ekoi8u::not_digit}',
1732							'[:^graph:]' => '${Char::Ekoi8u::not_graph}',
1733							'[:^lower:]' => '${Char::Ekoi8u::not_lower}',
1734							'[:^print:]' => '${Char::Ekoi8u::not_print}',
1735							'[:^punct:]' => '${Char::Ekoi8u::not_punct}',
1736							'[:^space:]' => '${Char::Ekoi8u::not_space}',
1737							'[:^upper:]' => '${Char::Ekoi8u::not_upper}',
1738							'[:^word:]' => '${Char::Ekoi8u::not_word}',
1739							'[:^xdigit:]' => '${Char::Ekoi8u::not_xdigit}',
1740
1741							}->{$1};
1742							}
1743							elsif ($char[$i] =~ /\A \\ ($q_char) \z/oxms) {
1744	0					0	$char[$i] = $1;
1745							}
1746							}
1747
1748							# open character list
1749	0					0	my @singleoctet = ();
1750	0					0	my @multipleoctet = ();
1751	0					0	for (my $i=0; $i <= $#char; ) {
1752
1753							# escaped -
1754	0	0	0			0	if (defined($char[$i+1]) and ($char[$i+1] eq '...')) {
		0
		0
		0
		0
		0
1755	0					0	$i += 1;
1756	0					0	next;
1757							}
1758
1759							# make range regexp
1760							elsif ($char[$i] eq '...') {
1761
1762							# range error
1763	0	0				0	if (CORE::length($char[$i-1]) > CORE::length($char[$i+1])) {
		0
1764	0					0	croak 'Invalid [] range in regexp (length(A) > length(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1765							}
1766							elsif (CORE::length($char[$i-1]) == CORE::length($char[$i+1])) {
1767	0	0				0	if ($char[$i-1] gt $char[$i+1]) {
1768	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1769							}
1770							}
1771
1772							# make range regexp per length
1773	0					0	for my $length (CORE::length($char[$i-1]) .. CORE::length($char[$i+1])) {
1774	0					0	my @regexp = ();
1775
1776							# is first and last
1777	0	0	0			0	if (($length == CORE::length($char[$i-1])) and ($length == CORE::length($char[$i+1]))) {
		0	0
		0
		0
1778	0					0	push @regexp, _range_regexp($length, $char[$i-1], $char[$i+1]);
1779							}
1780
1781							# is first
1782							elsif ($length == CORE::length($char[$i-1])) {
1783	0					0	push @regexp, _range_regexp($length, $char[$i-1], "\xFF" x $length);
1784							}
1785
1786							# is inside in first and last
1787							elsif ((CORE::length($char[$i-1]) < $length) and ($length < CORE::length($char[$i+1]))) {
1788	0					0	push @regexp, _range_regexp($length, "\x00" x $length, "\xFF" x $length);
1789							}
1790
1791							# is last
1792							elsif ($length == CORE::length($char[$i+1])) {
1793	0					0	push @regexp, _range_regexp($length, "\x00" x $length, $char[$i+1]);
1794							}
1795
1796							else {
1797	0					0	die __FILE__, ": subroutine make_regexp panic.";
1798							}
1799
1800	0	0				0	if ($length == 1) {
1801	0					0	push @singleoctet, @regexp;
1802							}
1803							else {
1804	0					0	push @multipleoctet, @regexp;
1805							}
1806							}
1807
1808	0					0	$i += 2;
1809							}
1810
1811							# with /i modifier
1812							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1813	0	0				0	if ($modifier =~ /i/oxms) {
1814	0					0	my $uc = Char::Ekoi8u::uc($char[$i]);
1815	0					0	my $fc = Char::Ekoi8u::fc($char[$i]);
1816	0	0				0	if ($uc ne $fc) {
1817	0	0				0	if (CORE::length($fc) == 1) {
1818	0					0	push @singleoctet, $uc, $fc;
1819							}
1820							else {
1821	0					0	push @singleoctet, $uc;
1822	0					0	push @multipleoctet, $fc;
1823							}
1824							}
1825							else {
1826	0					0	push @singleoctet, $char[$i];
1827							}
1828							}
1829							else {
1830	0					0	push @singleoctet, $char[$i];
1831							}
1832	0					0	$i += 1;
1833							}
1834
1835							# single character of single octet code
1836							elsif ($char[$i] =~ /\A (?: \\h ) \z/oxms) {
1837	0					0	push @singleoctet, "\t", "\x20";
1838	0					0	$i += 1;
1839							}
1840							elsif ($char[$i] =~ /\A (?: \\v ) \z/oxms) {
1841	0					0	push @singleoctet, "\x0A", "\x0B", "\x0C", "\x0D";
1842	0					0	$i += 1;
1843							}
1844							elsif ($char[$i] =~ /\A (?: \\d \| \\s \| \\w ) \z/oxms) {
1845	0					0	push @singleoctet, $char[$i];
1846	0					0	$i += 1;
1847							}
1848
1849							# single character of multiple-octet code
1850							else {
1851	0					0	push @multipleoctet, $char[$i];
1852	0					0	$i += 1;
1853							}
1854							}
1855
1856							# quote metachar
1857	0					0	for (@singleoctet) {
1858	0	0				0	if ($_ eq '...') {
		0
		0
		0
		0
1859	0					0	$_ = '-';
1860							}
1861							elsif (/\A \n \z/oxms) {
1862	0					0	$_ = '\n';
1863							}
1864							elsif (/\A \r \z/oxms) {
1865	0					0	$_ = '\r';
1866							}
1867							elsif (/\A ([\x00-\x20\x7F-\xFF]) \z/oxms) {
1868	0					0	$_ = sprintf('\x%02X', CORE::ord $1);
1869							}
1870							elsif (/\A [\x00-\xFF] \z/oxms) {
1871	0					0	$_ = quotemeta $_;
1872							}
1873							}
1874
1875							# return character list
1876	0					0	return \@singleoctet, \@multipleoctet;
1877							}
1878
1879							#
1880							# KOI8-U octal escape sequence
1881							#
1882							sub octchr {
1883	0			0	0	0	my($octdigit) = @_;
1884
1885	0					0	my @binary = ();
1886	0					0	for my $octal (split(//,$octdigit)) {
1887	0					0	push @binary, {
1888							'0' => '000',
1889							'1' => '001',
1890							'2' => '010',
1891							'3' => '011',
1892							'4' => '100',
1893							'5' => '101',
1894							'6' => '110',
1895							'7' => '111',
1896							}->{$octal};
1897							}
1898	0					0	my $binary = join '', @binary;
1899
1900	0					0	my $octchr = {
1901							# 1234567
1902							1 => pack('B*', "0000000$binary"),
1903							2 => pack('B*', "000000$binary"),
1904							3 => pack('B*', "00000$binary"),
1905							4 => pack('B*', "0000$binary"),
1906							5 => pack('B*', "000$binary"),
1907							6 => pack('B*', "00$binary"),
1908							7 => pack('B*', "0$binary"),
1909							0 => pack('B*', "$binary"),
1910
1911							}->{CORE::length($binary) % 8};
1912
1913	0					0	return $octchr;
1914							}
1915
1916							#
1917							# KOI8-U hexadecimal escape sequence
1918							#
1919							sub hexchr {
1920	0			0	0	0	my($hexdigit) = @_;
1921
1922	0					0	my $hexchr = {
1923							1 => pack('H*', "0$hexdigit"),
1924							0 => pack('H*', "$hexdigit"),
1925
1926							}->{CORE::length($_[0]) % 2};
1927
1928	0					0	return $hexchr;
1929							}
1930
1931							#
1932							# KOI8-U open character list for qr
1933							#
1934							sub charlist_qr {
1935
1936	0			0	0	0	my $modifier = pop @_;
1937	0					0	my @char = @_;
1938
1939	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
1940	0					0	my @singleoctet = @$singleoctet;
1941	0					0	my @multipleoctet = @$multipleoctet;
1942
1943							# return character list
1944	0	0				0	if (scalar(@singleoctet) >= 1) {
1945
1946							# with /i modifier
1947	0	0				0	if ($modifier =~ m/i/oxms) {
1948	0					0	my %singleoctet_ignorecase = ();
1949	0					0	for (@singleoctet) {
1950	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
1951	0					0	for my $ord (hex($1) .. hex($2)) {
1952	0					0	my $char = CORE::chr($ord);
1953	0					0	my $uc = Char::Ekoi8u::uc($char);
1954	0					0	my $fc = Char::Ekoi8u::fc($char);
1955	0	0				0	if ($uc eq $fc) {
1956	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
1957							}
1958							else {
1959	0	0				0	if (CORE::length($fc) == 1) {
1960	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1961	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
1962							}
1963							else {
1964	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1965	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
1966							}
1967							}
1968							}
1969							}
1970	0	0				0	if ($_ ne '') {
1971	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
1972							}
1973							}
1974	0					0	my $i = 0;
1975	0					0	my @singleoctet_ignorecase = ();
1976	0					0	for my $ord (0 .. 255) {
1977	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
1978	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
1979							}
1980							else {
1981	0					0	$i++;
1982							}
1983							}
1984	0					0	@singleoctet = ();
1985	0					0	for my $range (@singleoctet_ignorecase) {
1986	0	0				0	if (ref $range) {
1987	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
1988	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
1989							}
1990							elsif (scalar(@{$range}) == 2) {
1991	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1992							}
1993							else {
1994	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1995							}
1996							}
1997							}
1998							}
1999
2000	0					0	my $not_anchor = '';
2001
2002	0					0	push @multipleoctet, join('', $not_anchor, '[', @singleoctet, ']' );
2003							}
2004	0	0				0	if (scalar(@multipleoctet) >= 2) {
2005	0					0	return '(?:' . join('\|', @multipleoctet) . ')';
2006							}
2007							else {
2008	0					0	return $multipleoctet[0];
2009							}
2010							}
2011
2012							#
2013							# KOI8-U open character list for not qr
2014							#
2015							sub charlist_not_qr {
2016
2017	0			0	0	0	my $modifier = pop @_;
2018	0					0	my @char = @_;
2019
2020	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
2021	0					0	my @singleoctet = @$singleoctet;
2022	0					0	my @multipleoctet = @$multipleoctet;
2023
2024							# with /i modifier
2025	0	0				0	if ($modifier =~ m/i/oxms) {
2026	0					0	my %singleoctet_ignorecase = ();
2027	0					0	for (@singleoctet) {
2028	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
2029	0					0	for my $ord (hex($1) .. hex($2)) {
2030	0					0	my $char = CORE::chr($ord);
2031	0					0	my $uc = Char::Ekoi8u::uc($char);
2032	0					0	my $fc = Char::Ekoi8u::fc($char);
2033	0	0				0	if ($uc eq $fc) {
2034	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
2035							}
2036							else {
2037	0	0				0	if (CORE::length($fc) == 1) {
2038	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2039	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
2040							}
2041							else {
2042	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2043	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
2044							}
2045							}
2046							}
2047							}
2048	0	0				0	if ($_ ne '') {
2049	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
2050							}
2051							}
2052	0					0	my $i = 0;
2053	0					0	my @singleoctet_ignorecase = ();
2054	0					0	for my $ord (0 .. 255) {
2055	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
2056	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
2057							}
2058							else {
2059	0					0	$i++;
2060							}
2061							}
2062	0					0	@singleoctet = ();
2063	0					0	for my $range (@singleoctet_ignorecase) {
2064	0	0				0	if (ref $range) {
2065	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
2066	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
2067							}
2068							elsif (scalar(@{$range}) == 2) {
2069	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2070							}
2071							else {
2072	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2073							}
2074							}
2075							}
2076							}
2077
2078							# return character list
2079	0	0				0	if (scalar(@multipleoctet) >= 1) {
2080	0	0				0	if (scalar(@singleoctet) >= 1) {
2081
2082							# any character other than multiple-octet and single octet character class
2083	0					0	return '(?!' . join('\|', @multipleoctet) . ')(?:[^' . join('', @singleoctet) . '])';
2084							}
2085							else {
2086
2087							# any character other than multiple-octet character class
2088	0					0	return '(?!' . join('\|', @multipleoctet) . ")(?:$your_char)";
2089							}
2090							}
2091							else {
2092	0	0				0	if (scalar(@singleoctet) >= 1) {
2093
2094							# any character other than single octet character class
2095	0					0	return '(?:[^' . join('', @singleoctet) . '])';
2096							}
2097							else {
2098
2099							# any character
2100	0					0	return "(?:$your_char)";
2101							}
2102							}
2103							}
2104
2105							#
2106							# open file in read mode
2107							#
2108							sub _open_r {
2109	177			177		630	my(undef,$file) = @_;
2110	177					1313	$file =~ s#\A (\s) #./$1#oxms;
2111	177		33			20115	return eval(q{open($_[0],'<',$_[1])}) \|\|
2112							open($_[0],"< $file\0");
2113							}
2114
2115							#
2116							# open file in write mode
2117							#
2118							sub _open_w {
2119	0			0		0	my(undef,$file) = @_;
2120	0					0	$file =~ s#\A (\s) #./$1#oxms;
2121	0		0			0	return eval(q{open($_[0],'>',$_[1])}) \|\|
2122							open($_[0],"> $file\0");
2123							}
2124
2125							#
2126							# open file in append mode
2127							#
2128							sub _open_a {
2129	0			0		0	my(undef,$file) = @_;
2130	0					0	$file =~ s#\A (\s) #./$1#oxms;
2131	0		0			0	return eval(q{open($_[0],'>>',$_[1])}) \|\|
2132							open($_[0],">> $file\0");
2133							}
2134
2135							#
2136							# safe system
2137							#
2138							sub _systemx {
2139
2140							# P.707 29.2.33. exec
2141							# in Chapter 29: Functions
2142							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2143							#
2144							# Be aware that in older releases of Perl, exec (and system) did not flush
2145							# your output buffer, so you needed to enable command buffering by setting $\|
2146							# on one or more filehandles to avoid lost output in the case of exec, or
2147							# misordererd output in the case of system. This situation was largely remedied
2148							# in the 5.6 release of Perl. (So, 5.005 release not yet.)
2149
2150							# P.855 exec
2151							# in Chapter 27: Functions
2152							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2153							#
2154							# In very old release of Perl (before v5.6), exec (and system) did not flush
2155							# your output buffer, so you needed to enable command buffering by setting $\|
2156							# on one or more filehandles to avoid lost output with exec or misordered
2157							# output with system.
2158
2159	177			177		767	$\| = 1;
2160
2161							# P.565 23.1.2. Cleaning Up Your Environment
2162							# in Chapter 23: Security
2163							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2164
2165							# P.656 Cleaning Up Your Environment
2166							# in Chapter 20: Security
2167							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2168
2169							# local $ENV{'PATH'} = '.';
2170	177					1946	local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer
2171
2172							# P.707 29.2.33. exec
2173							# in Chapter 29: Functions
2174							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2175							#
2176							# As we mentioned earlier, exec treats a discrete list of arguments as an
2177							# indication that it should bypass shell processing. However, there is one
2178							# place where you might still get tripped up. The exec call (and system, too)
2179							# will not distinguish between a single scalar argument and an array containing
2180							# only one element.
2181							#
2182							# @args = ("echo surprise"); # just one element in list
2183							# exec @args # still subject to shell escapes
2184							# or die "exec: $!"; # because @args == 1
2185							#
2186							# To avoid this, you can use the PATHNAME syntax, explicitly duplicating the
2187							# first argument as the pathname, which forces the rest of the arguments to be
2188							# interpreted as a list, even if there is only one of them:
2189							#
2190							# exec { $args[0] } @args # safe even with one-argument list
2191							# or die "can't exec @args: $!";
2192
2193							# P.855 exec
2194							# in Chapter 27: Functions
2195							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2196							#
2197							# As we mentioned earlier, exec treats a discrete list of arguments as a
2198							# directive to bypass shell processing. However, there is one place where
2199							# you might still get tripped up. The exec call (and system, too) cannot
2200							# distinguish between a single scalar argument and an array containing
2201							# only one element.
2202							#
2203							# @args = ("echo surprise"); # just one element in list
2204							# exec @args # still subject to shell escapes
2205							# \|\| die "exec: $!"; # because @args == 1
2206							#
2207							# To avoid this, use the PATHNAME syntax, explicitly duplicating the first
2208							# argument as the pathname, which forces the rest of the arguments to be
2209							# interpreted as a list, even if there is only one of them:
2210							#
2211							# exec { $args[0] } @args # safe even with one-argument list
2212							# \|\| die "can't exec @args: $!";
2213
2214	177					466	return CORE::system { $_[0] } @_; # safe even with one-argument list
	177					14768705
2215							}
2216
2217							#
2218							# KOI8-U order to character (with parameter)
2219							#
2220							sub Char::Ekoi8u::chr(;$) {
2221
2222	0	0		0	0		my $c = @_ ? $_[0] : $_;
2223
2224	0	0					if ($c == 0x00) {
2225	0						return "\x00";
2226							}
2227							else {
2228	0						my @chr = ();
2229	0						while ($c > 0) {
2230	0						unshift @chr, ($c % 0x100);
2231	0						$c = int($c / 0x100);
2232							}
2233	0						return pack 'C*', @chr;
2234							}
2235							}
2236
2237							#
2238							# KOI8-U order to character (without parameter)
2239							#
2240							sub Char::Ekoi8u::chr_() {
2241
2242	0			0	0		my $c = $_;
2243
2244	0	0					if ($c == 0x00) {
2245	0						return "\x00";
2246							}
2247							else {
2248	0						my @chr = ();
2249	0						while ($c > 0) {
2250	0						unshift @chr, ($c % 0x100);
2251	0						$c = int($c / 0x100);
2252							}
2253	0						return pack 'C*', @chr;
2254							}
2255							}
2256
2257							#
2258							# KOI8-U path globbing (with parameter)
2259							#
2260							sub Char::Ekoi8u::glob($) {
2261
2262	0	0		0	0		if (wantarray) {
2263	0						my @glob = _DOS_like_glob(@_);
2264	0						for my $glob (@glob) {
2265	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2266							}
2267	0						return @glob;
2268							}
2269							else {
2270	0						my $glob = _DOS_like_glob(@_);
2271	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2272	0						return $glob;
2273							}
2274							}
2275
2276							#
2277							# KOI8-U path globbing (without parameter)
2278							#
2279							sub Char::Ekoi8u::glob_() {
2280
2281	0	0		0	0		if (wantarray) {
2282	0						my @glob = _DOS_like_glob();
2283	0						for my $glob (@glob) {
2284	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2285							}
2286	0						return @glob;
2287							}
2288							else {
2289	0						my $glob = _DOS_like_glob();
2290	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2291	0						return $glob;
2292							}
2293							}
2294
2295							#
2296							# KOI8-U path globbing via File::DosGlob 1.10
2297							#
2298							# Often I confuse "_dosglob" and "_doglob".
2299							# So, I renamed "_dosglob" to "_DOS_like_glob".
2300							#
2301							my %iter;
2302							my %entries;
2303							sub _DOS_like_glob {
2304
2305							# context (keyed by second cxix argument provided by core)
2306	0			0			my($expr,$cxix) = @_;
2307
2308							# glob without args defaults to $_
2309	0	0					$expr = $_ if not defined $expr;
2310
2311							# represents the current user's home directory
2312							#
2313							# 7.3. Expanding Tildes in Filenames
2314							# in Chapter 7. File Access
2315							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2316							#
2317							# and File::HomeDir, File::HomeDir::Windows module
2318
2319							# DOS-like system
2320	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2321	0						$expr =~ s{ \A ~ (?= [^/\\] ) }
2322	0						{ my_home_MSWin32() }oxmse;
2323							}
2324
2325							# UNIX-like system
2326							else {
2327	0						$expr =~ s{ \A ~ ( (?:[^/])* ) }
2328	0	0	0				{ $1 ? (eval(q{(getpwnam($1))[7]})\|\|my_home()) : my_home() }oxmse;
2329							}
2330
2331							# assume global context if not provided one
2332	0	0					$cxix = '_G_' if not defined $cxix;
2333	0	0					$iter{$cxix} = 0 if not exists $iter{$cxix};
2334
2335							# if we're just beginning, do it all first
2336	0	0					if ($iter{$cxix} == 0) {
2337	0						$entries{$cxix} = [ _do_glob(1, _parse_line($expr)) ];
2338							}
2339
2340							# chuck it all out, quick or slow
2341	0	0					if (wantarray) {
2342	0						delete $iter{$cxix};
2343	0						return @{delete $entries{$cxix}};
	0
2344							}
2345							else {
2346	0	0					if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
	0
2347	0						return shift @{$entries{$cxix}};
	0
2348							}
2349							else {
2350							# return undef for EOL
2351	0						delete $iter{$cxix};
2352	0						delete $entries{$cxix};
2353	0						return undef;
2354							}
2355							}
2356							}
2357
2358							#
2359							# KOI8-U path globbing subroutine
2360							#
2361							sub _do_glob {
2362
2363	0			0			my($cond,@expr) = @_;
2364	0						my @glob = ();
2365	0						my $fix_drive_relative_paths = 0;
2366
2367							OUTER:
2368	0						for my $expr (@expr) {
2369	0	0					next OUTER if not defined $expr;
2370	0	0					next OUTER if $expr eq '';
2371
2372	0						my @matched = ();
2373	0						my @globdir = ();
2374	0						my $head = '.';
2375	0						my $pathsep = '/';
2376	0						my $tail;
2377
2378							# if argument is within quotes strip em and do no globbing
2379	0	0					if ($expr =~ /\A " ((?:$q_char)*) " \z/oxms) {
2380	0						$expr = $1;
2381	0	0					if ($cond eq 'd') {
2382	0	0					if (-d $expr) {
2383	0						push @glob, $expr;
2384							}
2385							}
2386							else {
2387	0	0					if (-e $expr) {
2388	0						push @glob, $expr;
2389							}
2390							}
2391	0						next OUTER;
2392							}
2393
2394							# wildcards with a drive prefix such as h:*.pm must be changed
2395							# to h:./*.pm to expand correctly
2396	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2397	0	0					if ($expr =~ s# \A ((?:[A-Za-z]:)?) ([^/\\]) #$1./$2#oxms) {
2398	0						$fix_drive_relative_paths = 1;
2399							}
2400							}
2401
2402	0	0					if (($head, $tail) = _parse_path($expr,$pathsep)) {
2403	0	0					if ($tail eq '') {
2404	0						push @glob, $expr;
2405	0						next OUTER;
2406							}
2407	0	0					if ($head =~ / \A (?:$q_char)? [?] /oxms) {
2408	0	0					if (@globdir = _do_glob('d', $head)) {
2409	0						push @glob, _do_glob($cond, map {"$_$pathsep$tail"} @globdir);
	0
2410	0						next OUTER;
2411							}
2412							}
2413	0	0	0				if ($head eq '' or $head =~ /\A [A-Za-z]: \z/oxms) {
2414	0						$head .= $pathsep;
2415							}
2416	0						$expr = $tail;
2417							}
2418
2419							# If file component has no wildcards, we can avoid opendir
2420	0	0					if ($expr !~ / \A (?:$q_char)? [?] /oxms) {
2421	0	0					if ($head eq '.') {
2422	0						$head = '';
2423							}
2424	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2425	0						$head .= $pathsep;
2426							}
2427	0						$head .= $expr;
2428	0	0					if ($cond eq 'd') {
2429	0	0					if (-d $head) {
2430	0						push @glob, $head;
2431							}
2432							}
2433							else {
2434	0	0					if (-e $head) {
2435	0						push @glob, $head;
2436							}
2437							}
2438	0						next OUTER;
2439							}
2440	0	0					opendir(*DIR, $head) or next OUTER;
2441	0						my @leaf = readdir DIR;
2442	0						closedir DIR;
2443
2444	0	0					if ($head eq '.') {
2445	0						$head = '';
2446							}
2447	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2448	0						$head .= $pathsep;
2449							}
2450
2451	0						my $pattern = '';
2452	0						while ($expr =~ / \G ($q_char) /oxgc) {
2453	0						my $char = $1;
2454
2455							# 6.9. Matching Shell Globs as Regular Expressions
2456							# in Chapter 6. Pattern Matching
2457							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2458							# (and so on)
2459
2460	0	0					if ($char eq '*') {
		0
		0
2461	0						$pattern .= "(?:$your_char)*",
2462							}
2463							elsif ($char eq '?') {
2464	0						$pattern .= "(?:$your_char)?", # DOS style
2465							# $pattern .= "(?:$your_char)", # UNIX style
2466							}
2467							elsif ((my $fc = Char::Ekoi8u::fc($char)) ne $char) {
2468	0						$pattern .= $fc;
2469							}
2470							else {
2471	0						$pattern .= quotemeta $char;
2472							}
2473							}
2474	0			0			my $matchsub = sub { Char::Ekoi8u::fc($_[0]) =~ /\A $pattern \z/xms };
	0
2475
2476							# if ($@) {
2477							# print STDERR "$0: $@\n";
2478							# next OUTER;
2479							# }
2480
2481							INNER:
2482	0						for my $leaf (@leaf) {
2483	0	0	0				if ($leaf eq '.' or $leaf eq '..') {
2484	0						next INNER;
2485							}
2486	0	0	0				if ($cond eq 'd' and not -d "$head$leaf") {
2487	0						next INNER;
2488							}
2489
2490	0	0					if (&$matchsub($leaf)) {
2491	0						push @matched, "$head$leaf";
2492	0						next INNER;
2493							}
2494
2495							# [DOS compatibility special case]
2496							# Failed, add a trailing dot and try again, but only...
2497
2498	0	0	0				if (Char::Ekoi8u::index($leaf,'.') == -1 and # if name does not have a dot in it and
			0
2499							CORE::length($leaf) <= 8 and # name is shorter than or equal to 8 chars and
2500							Char::Ekoi8u::index($pattern,'\\.') != -1 # pattern has a dot.
2501							) {
2502	0	0					if (&$matchsub("$leaf.")) {
2503	0						push @matched, "$head$leaf";
2504	0						next INNER;
2505							}
2506							}
2507							}
2508	0	0					if (@matched) {
2509	0						push @glob, @matched;
2510							}
2511							}
2512	0	0					if ($fix_drive_relative_paths) {
2513	0						for my $glob (@glob) {
2514	0						$glob =~ s# \A ([A-Za-z]:) \./ #$1#oxms;
2515							}
2516							}
2517	0						return @glob;
2518							}
2519
2520							#
2521							# KOI8-U parse line
2522							#
2523							sub _parse_line {
2524
2525	0			0			my($line) = @_;
2526
2527	0						$line .= ' ';
2528	0						my @piece = ();
2529	0						while ($line =~ /
2530							" ( (?: [^"] )* ) " \s+ \|
2531							( (?: [^"\s] )* ) \s+
2532							/oxmsg
2533							) {
2534	0	0					push @piece, defined($1) ? $1 : $2;
2535							}
2536	0						return @piece;
2537							}
2538
2539							#
2540							# KOI8-U parse path
2541							#
2542							sub _parse_path {
2543
2544	0			0			my($path,$pathsep) = @_;
2545
2546	0						$path .= '/';
2547	0						my @subpath = ();
2548	0						while ($path =~ /
2549							((?: [^\/\\] )+?) [\/\\]
2550							/oxmsg
2551							) {
2552	0						push @subpath, $1;
2553							}
2554
2555	0						my $tail = pop @subpath;
2556	0						my $head = join $pathsep, @subpath;
2557	0						return $head, $tail;
2558							}
2559
2560							#
2561							# via File::HomeDir::Windows 1.00
2562							#
2563							sub my_home_MSWin32 {
2564
2565							# A lot of unix people and unix-derived tools rely on
2566							# the ability to overload HOME. We will support it too
2567							# so that they can replace raw HOME calls with File::HomeDir.
2568	0	0	0	0	0		if (exists $ENV{'HOME'} and $ENV{'HOME'}) {
		0	0
		0	0
			0
			0
2569	0						return $ENV{'HOME'};
2570							}
2571
2572							# Do we have a user profile?
2573							elsif (exists $ENV{'USERPROFILE'} and $ENV{'USERPROFILE'}) {
2574	0						return $ENV{'USERPROFILE'};
2575							}
2576
2577							# Some Windows use something like $ENV{'HOME'}
2578							elsif (exists $ENV{'HOMEDRIVE'} and exists $ENV{'HOMEPATH'} and $ENV{'HOMEDRIVE'} and $ENV{'HOMEPATH'}) {
2579	0						return join '', $ENV{'HOMEDRIVE'}, $ENV{'HOMEPATH'};
2580							}
2581
2582	0						return undef;
2583							}
2584
2585							#
2586							# via File::HomeDir::Unix 1.00
2587							#
2588							sub my_home {
2589	0			0	0		my $home;
2590
2591	0	0	0				if (exists $ENV{'HOME'} and defined $ENV{'HOME'}) {
		0	0
2592	0						$home = $ENV{'HOME'};
2593							}
2594
2595							# This is from the original code, but I'm guessing
2596							# it means "login directory" and exists on some Unixes.
2597							elsif (exists $ENV{'LOGDIR'} and $ENV{'LOGDIR'}) {
2598	0						$home = $ENV{'LOGDIR'};
2599							}
2600
2601							### More-desperate methods
2602
2603							# Light desperation on any (Unixish) platform
2604							else {
2605	0						$home = eval q{ (getpwuid($<))[7] };
2606							}
2607
2608							# On Unix in general, a non-existant home means "no home"
2609							# For example, "nobody"-like users might use /nonexistant
2610	0	0	0				if (defined $home and ! -d($home)) {
2611	0						$home = undef;
2612							}
2613	0						return $home;
2614							}
2615
2616							#
2617							# ${^PREMATCH}, $PREMATCH, $` the string preceding what was matched
2618							#
2619							sub Char::Ekoi8u::PREMATCH {
2620	0			0	0		return $`;
2621							}
2622
2623							#
2624							# ${^MATCH}, $MATCH, $& the string that matched
2625							#
2626							sub Char::Ekoi8u::MATCH {
2627	0			0	0		return $&;
2628							}
2629
2630							#
2631							# ${^POSTMATCH}, $POSTMATCH, $' the string following what was matched
2632							#
2633							sub Char::Ekoi8u::POSTMATCH {
2634	0			0	0		return $';
2635							}
2636
2637							#
2638							# KOI8-U character to order (with parameter)
2639							#
2640							sub Char::KOI8U::ord(;$) {
2641
2642	0	0		0	1		local $_ = shift if @_;
2643
2644	0	0					if (/\A ($q_char) /oxms) {
2645	0						my @ord = unpack 'C*', $1;
2646	0						my $ord = 0;
2647	0						while (my $o = shift @ord) {
2648	0						$ord = $ord * 0x100 + $o;
2649							}
2650	0						return $ord;
2651							}
2652							else {
2653	0						return CORE::ord $_;
2654							}
2655							}
2656
2657							#
2658							# KOI8-U character to order (without parameter)
2659							#
2660							sub Char::KOI8U::ord_() {
2661
2662	0	0		0	0		if (/\A ($q_char) /oxms) {
2663	0						my @ord = unpack 'C*', $1;
2664	0						my $ord = 0;
2665	0						while (my $o = shift @ord) {
2666	0						$ord = $ord * 0x100 + $o;
2667							}
2668	0						return $ord;
2669							}
2670							else {
2671	0						return CORE::ord $_;
2672							}
2673							}
2674
2675							#
2676							# KOI8-U reverse
2677							#
2678							sub Char::KOI8U::reverse(@) {
2679
2680	0	0		0	0		if (wantarray) {
2681	0						return CORE::reverse @_;
2682							}
2683							else {
2684
2685							# One of us once cornered Larry in an elevator and asked him what
2686							# problem he was solving with this, but he looked as far off into
2687							# the distance as he could in an elevator and said, "It seemed like
2688							# a good idea at the time."
2689
2690	0						return join '', CORE::reverse(join('',@_) =~ /\G ($q_char) /oxmsg);
2691							}
2692							}
2693
2694							#
2695							# KOI8-U getc (with parameter, without parameter)
2696							#
2697							sub Char::KOI8U::getc(;*@) {
2698
2699	0			0	0		my($package) = caller;
2700	0	0					my $fh = @_ ? qualify_to_ref(shift,$package) : \*STDIN;
2701	0	0	0				croak 'Too many arguments for Char::KOI8U::getc' if @_ and not wantarray;
2702
2703	0						my @length = sort { $a <=> $b } keys %range_tr;
	0
2704	0						my $getc = '';
2705	0						for my $length ($length[0] .. $length[-1]) {
2706	0						$getc .= CORE::getc($fh);
2707	0	0					if (exists $range_tr{CORE::length($getc)}) {
2708	0	0					if ($getc =~ /\A ${Char::Ekoi8u::dot_s} \z/oxms) {
2709	0	0					return wantarray ? ($getc,@_) : $getc;
2710							}
2711							}
2712							}
2713	0	0					return wantarray ? ($getc,@_) : $getc;
2714							}
2715
2716							#
2717							# KOI8-U length by character
2718							#
2719							sub Char::KOI8U::length(;$) {
2720
2721	0	0		0	1		local $_ = shift if @_;
2722
2723	0						local @_ = /\G ($q_char) /oxmsg;
2724	0						return scalar @_;
2725							}
2726
2727							#
2728							# KOI8-U substr by character
2729							#
2730							BEGIN {
2731
2732							# P.232 The lvalue Attribute
2733							# in Chapter 6: Subroutines
2734							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2735
2736							# P.336 The lvalue Attribute
2737							# in Chapter 7: Subroutines
2738							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2739
2740							# P.144 8.4 Lvalue subroutines
2741							# in Chapter 8: perlsub: Perl subroutines
2742							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
2743
2744	177	50	0	177	1	414813	eval sprintf(<<'END', ($] >= 5.014000) ? ':lvalue' : '');
	0	0		0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
2745							# vv----------------*******
2746							sub Char::KOI8U::substr($$;$$) %s {
2747
2748							my @char = $_[0] =~ /\G ($q_char) /oxmsg;
2749
2750							# If the substring is beyond either end of the string, substr() returns the undefined
2751							# value and produces a warning. When used as an lvalue, specifying a substring that
2752							# is entirely outside the string raises an exception.
2753							# http://perldoc.perl.org/functions/substr.html
2754
2755							# A return with no argument returns the scalar value undef in scalar context,
2756							# an empty list () in list context, and (naturally) nothing at all in void
2757							# context.
2758
2759							my $offset = $_[1];
2760							if (($offset > scalar(@char)) or ($offset < (-1 * scalar(@char)))) {
2761							return;
2762							}
2763
2764							# substr($string,$offset,$length,$replacement)
2765							if (@_ == 4) {
2766							my(undef,undef,$length,$replacement) = @_;
2767							my $substr = join '', splice(@char, $offset, $length, $replacement);
2768							$_[0] = join '', @char;
2769
2770							# return $substr; this doesn't work, don't say "return"
2771							$substr;
2772							}
2773
2774							# substr($string,$offset,$length)
2775							elsif (@_ == 3) {
2776							my(undef,undef,$length) = @_;
2777							my $octet_offset = 0;
2778							my $octet_length = 0;
2779							if ($offset == 0) {
2780							$octet_offset = 0;
2781							}
2782							elsif ($offset > 0) {
2783							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2784							}
2785							else {
2786							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2787							}
2788							if ($length == 0) {
2789							$octet_length = 0;
2790							}
2791							elsif ($length > 0) {
2792							$octet_length = CORE::length(join '', @char[$offset..$offset+$length-1]);
2793							}
2794							else {
2795							$octet_length = -1 * CORE::length(join '', @char[$#char+$length+1..$#char]);
2796							}
2797							CORE::substr($_[0], $octet_offset, $octet_length);
2798							}
2799
2800							# substr($string,$offset)
2801							else {
2802							my $octet_offset = 0;
2803							if ($offset == 0) {
2804							$octet_offset = 0;
2805							}
2806							elsif ($offset > 0) {
2807							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2808							}
2809							else {
2810							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2811							}
2812							CORE::substr($_[0], $octet_offset);
2813							}
2814							}
2815							END
2816							}
2817
2818							#
2819							# KOI8-U index by character
2820							#
2821							sub Char::KOI8U::index($$;$) {
2822
2823	0			0	1		my $index;
2824	0	0					if (@_ == 3) {
2825	0						$index = Char::Ekoi8u::index($_[0], $_[1], CORE::length(Char::KOI8U::substr($_[0], 0, $_[2])));
2826							}
2827							else {
2828	0						$index = Char::Ekoi8u::index($_[0], $_[1]);
2829							}
2830
2831	0	0					if ($index == -1) {
2832	0						return -1;
2833							}
2834							else {
2835	0						return Char::KOI8U::length(CORE::substr $_[0], 0, $index);
2836							}
2837							}
2838
2839							#
2840							# KOI8-U rindex by character
2841							#
2842							sub Char::KOI8U::rindex($$;$) {
2843
2844	0			0	1		my $rindex;
2845	0	0					if (@_ == 3) {
2846	0						$rindex = Char::Ekoi8u::rindex($_[0], $_[1], CORE::length(Char::KOI8U::substr($_[0], 0, $_[2])));
2847							}
2848							else {
2849	0						$rindex = Char::Ekoi8u::rindex($_[0], $_[1]);
2850							}
2851
2852	0	0					if ($rindex == -1) {
2853	0						return -1;
2854							}
2855							else {
2856	0						return Char::KOI8U::length(CORE::substr $_[0], 0, $rindex);
2857							}
2858							}
2859
2860							#
2861							# instead of Carp::carp
2862							#
2863							sub carp {
2864	0			0	0		my($package,$filename,$line) = caller(1);
2865	0						print STDERR "@_ at $filename line $line.\n";
2866							}
2867
2868							#
2869							# instead of Carp::croak
2870							#
2871							sub croak {
2872	0			0	0		my($package,$filename,$line) = caller(1);
2873	0						print STDERR "@_ at $filename line $line.\n";
2874	0						die "\n";
2875							}
2876
2877							#
2878							# instead of Carp::cluck
2879							#
2880							sub cluck {
2881	0			0	0		my $i = 0;
2882	0						my @cluck = ();
2883	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2884	0						push @cluck, "[$i] $filename($line) $package::$subroutine\n";
2885	0						$i++;
2886							}
2887	0						print STDERR CORE::reverse @cluck;
2888	0						print STDERR "\n";
2889	0						carp @_;
2890							}
2891
2892							#
2893							# instead of Carp::confess
2894							#
2895							sub confess {
2896	0			0	0		my $i = 0;
2897	0						my @confess = ();
2898	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2899	0						push @confess, "[$i] $filename($line) $package::$subroutine\n";
2900	0						$i++;
2901							}
2902	0						print STDERR CORE::reverse @confess;
2903	0						print STDERR "\n";
2904	0						croak @_;
2905							}
2906
2907							1;
2908
2909							__END__