File Coverage

Char/Ekoi8r.pm

Criterion	Covered	Total	%
statement	51	954	5.3
branch	4	562	0.7
condition	1	180	0.5
subroutine	20	85	23.5
pod	7	50	14.0
total	83	1831	4.5

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# This file is * FOR CPAN USE ONLY *.
3							#
4							package Char::Ekoi8r;
5							######################################################################
6							#
7							# Char::Ekoi8r - Run-time routines for Char/KOI8R.pm
8							#
9							# http://search.cpan.org/dist/Char-KOI8R/
10							#
11							# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 INABA Hitoshi
12							######################################################################
13
14	177			177		4158	use 5.00503; # Galapagos Consensus 1998 for primetools
	177					949
	177					8725
15							# use 5.008001; # Lancaster Consensus 2013 for toolchains
16
17							# 12.3. Delaying use Until Runtime
18							# in Chapter 12. Packages, Libraries, and Modules
19							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
20							# (and so on)
21
22	177			177		11029	BEGIN { eval q{ use vars qw($VERSION) } }
	177			177		1012
	177					377
	177					37540
23							$VERSION = sprintf '%d.%02d', q$Revision: 1.01 $ =~ /(\d+)/xmsg;
24
25							BEGIN {
26	177	50		177		1503	if ($^X =~ / jperl /oxmsi) {
27	0					0	die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)";
28							}
29	177					292	if (CORE::ord('A') == 193) {
30							die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).";
31							}
32	177					31498	if (CORE::ord('A') != 0x41) {
33							die __FILE__, ": is not US-ASCII script (must be US-ASCII script).";
34							}
35							}
36
37							BEGIN {
38
39							# instead of utf8.pm
40	177			177		11373	eval q{
	177			177		1183
	177			74		329
	177					28511
	74					12673
	52					9505
	56					11318
	61					10994
	61					11862
	50					11782
41							no warnings qw(redefine);
42							*utf8::upgrade = sub { CORE::length $_[0] };
43							*utf8::downgrade = sub { 1 };
44							*utf8::encode = sub { };
45							*utf8::decode = sub { 1 };
46							*utf8::is_utf8 = sub { };
47							*utf8::valid = sub { 1 };
48							};
49	177	50				125798	if ($@) {
50	0					0	*utf8::upgrade = sub { CORE::length $_[0] };
	0					0
51	0					0	*utf8::downgrade = sub { 1 };
	0					0
52	0					0	*utf8::encode = sub { };
	0					0
53	0					0	*utf8::decode = sub { 1 };
	0					0
54	0					0	*utf8::is_utf8 = sub { };
	0					0
55	0					0	*utf8::valid = sub { 1 };
	0					0
56							}
57							}
58
59							# instead of Symbol.pm
60							BEGIN {
61	177			177		427	my $genpkg = "Symbol::";
62	177					10793	my $genseq = 0;
63
64							sub gensym () {
65	0			0	0	0	my $name = "GEN" . $genseq++;
66
67							# here, no strict qw(refs); if strict.pm exists
68
69	0					0	my $ref = \*{$genpkg . $name};
	0					0
70	0					0	delete $$genpkg{$name};
71	0					0	return $ref;
72							}
73
74							sub qualify ($;$) {
75	0			0	0	0	my ($name) = @_;
76	0	0	0			0	if (!ref($name) && (Char::Ekoi8r::index($name, '::') == -1) && (Char::Ekoi8r::index($name, "'") == -1)) {
			0
77	0					0	my $pkg;
78	0					0	my %global = map {$_ => 1} qw(ARGV ARGVOUT ENV INC SIG STDERR STDIN STDOUT DATA);
	0					0
79
80							# Global names: special character, "^xyz", or other.
81	0	0	0			0	if ($name =~ /^(([^a-z])\|(\^[a-z_]+))\z/i \|\| $global{$name}) {
82							# RGS 2001-11-05 : translate leading ^X to control-char
83	0					0	$name =~ s/^\^([a-z_])/'qq(\c'.$1.')'/eei;
	0					0
84	0					0	$pkg = "main";
85							}
86							else {
87	0	0				0	$pkg = (@_ > 1) ? $_[1] : caller;
88							}
89	0					0	$name = $pkg . "::" . $name;
90							}
91	0					0	return $name;
92							}
93
94							sub qualify_to_ref ($;$) {
95
96							# here, no strict qw(refs); if strict.pm exists
97
98	0	0		0	0	0	return \*{ qualify $_[0], @_ > 1 ? $_[1] : caller };
	0					0
99							}
100							}
101
102							# Column: local $@
103							# in Chapter 9. Osaete okitai Perl no kiso
104							# of ISBN 10: 4798119172 \| ISBN 13: 978-4798119175 MODAN Perl NYUMON
105							# (and so on)
106
107							# use strict; if strict.pm exists
108							BEGIN {
109	177	50		177		513	if (eval { local $@; CORE::require strict }) {
	177					572
	177					2443
110	177					29989	strict::->import;
111							}
112							}
113
114							# P.714 29.2.39. flock
115							# in Chapter 29: Functions
116							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
117
118							# P.863 flock
119							# in Chapter 27: Functions
120							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
121
122							sub LOCK_SH() {1}
123							sub LOCK_EX() {2}
124							sub LOCK_UN() {8}
125							sub LOCK_NB() {4}
126
127							# instead of Carp.pm
128							sub carp;
129							sub croak;
130							sub cluck;
131							sub confess;
132
133							my $your_char = q{[\x00-\xFF]};
134
135							# regexp of character
136	177			177		21619	BEGIN { eval q{ use vars qw($q_char) } }
	177			177		980
	177					518
	177					14504
137							$q_char = qr/$your_char/oxms;
138
139							#
140							# KOI8-R character range per length
141							#
142							my %range_tr = ();
143
144							#
145							# alias of encoding name
146							#
147	177			177		10496	BEGIN { eval q{ use vars qw($encoding_alias) } }
	177			177		994
	177					436
	177					393771
148
149							#
150							# KOI8-R case conversion
151							#
152							my %lc = ();
153							@lc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
154							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
155							my %uc = ();
156							@uc{qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)} =
157							qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
158							my %fc = ();
159							@fc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
160							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
161
162							if (0) {
163							}
164
165							elsif (__PACKAGE__ =~ / \b Ekoi8r \z/oxms) {
166							%range_tr = (
167							1 => [ [0x00..0xFF],
168							],
169							);
170							$encoding_alias = qr/ \b (?: koi8-?r ) \b /oxmsi;
171
172							%lc = (%lc,
173							"\xB3" => "\xA3", # CYRILLIC LETTER IO
174							"\xE0" => "\xC0", # CYRILLIC LETTER IU
175							"\xE1" => "\xC1", # CYRILLIC LETTER A
176							"\xE2" => "\xC2", # CYRILLIC LETTER BE
177							"\xE3" => "\xC3", # CYRILLIC LETTER TSE
178							"\xE4" => "\xC4", # CYRILLIC LETTER DE
179							"\xE5" => "\xC5", # CYRILLIC LETTER IE
180							"\xE6" => "\xC6", # CYRILLIC LETTER EF
181							"\xE7" => "\xC7", # CYRILLIC LETTER GE
182							"\xE8" => "\xC8", # CYRILLIC LETTER KHA
183							"\xE9" => "\xC9", # CYRILLIC LETTER II
184							"\xEA" => "\xCA", # CYRILLIC LETTER SHORT II
185							"\xEB" => "\xCB", # CYRILLIC LETTER KA
186							"\xEC" => "\xCC", # CYRILLIC LETTER EL
187							"\xED" => "\xCD", # CYRILLIC LETTER EM
188							"\xEE" => "\xCE", # CYRILLIC LETTER EN
189							"\xEF" => "\xCF", # CYRILLIC LETTER O
190							"\xF0" => "\xD0", # CYRILLIC LETTER PE
191							"\xF1" => "\xD1", # CYRILLIC LETTER IA
192							"\xF2" => "\xD2", # CYRILLIC LETTER ER
193							"\xF3" => "\xD3", # CYRILLIC LETTER ES
194							"\xF4" => "\xD4", # CYRILLIC LETTER TE
195							"\xF5" => "\xD5", # CYRILLIC LETTER U
196							"\xF6" => "\xD6", # CYRILLIC LETTER ZHE
197							"\xF7" => "\xD7", # CYRILLIC LETTER VE
198							"\xF8" => "\xD8", # CYRILLIC LETTER SOFT SIGN
199							"\xF9" => "\xD9", # CYRILLIC LETTER YERI
200							"\xFA" => "\xDA", # CYRILLIC LETTER ZE
201							"\xFB" => "\xDB", # CYRILLIC LETTER SHA
202							"\xFC" => "\xDC", # CYRILLIC LETTER REVERSED E
203							"\xFD" => "\xDD", # CYRILLIC LETTER SHCHA
204							"\xFE" => "\xDE", # CYRILLIC LETTER CHE
205							"\xFF" => "\xDF", # CYRILLIC LETTER HARD SIGN
206							);
207
208							%uc = (%uc,
209							"\xA3" => "\xB3", # CYRILLIC LETTER IO
210							"\xC0" => "\xE0", # CYRILLIC LETTER IU
211							"\xC1" => "\xE1", # CYRILLIC LETTER A
212							"\xC2" => "\xE2", # CYRILLIC LETTER BE
213							"\xC3" => "\xE3", # CYRILLIC LETTER TSE
214							"\xC4" => "\xE4", # CYRILLIC LETTER DE
215							"\xC5" => "\xE5", # CYRILLIC LETTER IE
216							"\xC6" => "\xE6", # CYRILLIC LETTER EF
217							"\xC7" => "\xE7", # CYRILLIC LETTER GE
218							"\xC8" => "\xE8", # CYRILLIC LETTER KHA
219							"\xC9" => "\xE9", # CYRILLIC LETTER II
220							"\xCA" => "\xEA", # CYRILLIC LETTER SHORT II
221							"\xCB" => "\xEB", # CYRILLIC LETTER KA
222							"\xCC" => "\xEC", # CYRILLIC LETTER EL
223							"\xCD" => "\xED", # CYRILLIC LETTER EM
224							"\xCE" => "\xEE", # CYRILLIC LETTER EN
225							"\xCF" => "\xEF", # CYRILLIC LETTER O
226							"\xD0" => "\xF0", # CYRILLIC LETTER PE
227							"\xD1" => "\xF1", # CYRILLIC LETTER IA
228							"\xD2" => "\xF2", # CYRILLIC LETTER ER
229							"\xD3" => "\xF3", # CYRILLIC LETTER ES
230							"\xD4" => "\xF4", # CYRILLIC LETTER TE
231							"\xD5" => "\xF5", # CYRILLIC LETTER U
232							"\xD6" => "\xF6", # CYRILLIC LETTER ZHE
233							"\xD7" => "\xF7", # CYRILLIC LETTER VE
234							"\xD8" => "\xF8", # CYRILLIC LETTER SOFT SIGN
235							"\xD9" => "\xF9", # CYRILLIC LETTER YERI
236							"\xDA" => "\xFA", # CYRILLIC LETTER ZE
237							"\xDB" => "\xFB", # CYRILLIC LETTER SHA
238							"\xDC" => "\xFC", # CYRILLIC LETTER REVERSED E
239							"\xDD" => "\xFD", # CYRILLIC LETTER SHCHA
240							"\xDE" => "\xFE", # CYRILLIC LETTER CHE
241							"\xDF" => "\xFF", # CYRILLIC LETTER HARD SIGN
242							);
243
244							%fc = (%fc,
245							"\xB3" => "\xA3", # CYRILLIC CAPITAL LETTER IO --> CYRILLIC SMALL LETTER IO
246							"\xE0" => "\xC0", # CYRILLIC CAPITAL LETTER YU --> CYRILLIC SMALL LETTER YU
247							"\xE1" => "\xC1", # CYRILLIC CAPITAL LETTER A --> CYRILLIC SMALL LETTER A
248							"\xE2" => "\xC2", # CYRILLIC CAPITAL LETTER BE --> CYRILLIC SMALL LETTER BE
249							"\xE3" => "\xC3", # CYRILLIC CAPITAL LETTER TSE --> CYRILLIC SMALL LETTER TSE
250							"\xE4" => "\xC4", # CYRILLIC CAPITAL LETTER DE --> CYRILLIC SMALL LETTER DE
251							"\xE5" => "\xC5", # CYRILLIC CAPITAL LETTER IE --> CYRILLIC SMALL LETTER IE
252							"\xE6" => "\xC6", # CYRILLIC CAPITAL LETTER EF --> CYRILLIC SMALL LETTER EF
253							"\xE7" => "\xC7", # CYRILLIC CAPITAL LETTER GHE --> CYRILLIC SMALL LETTER GHE
254							"\xE8" => "\xC8", # CYRILLIC CAPITAL LETTER HA --> CYRILLIC SMALL LETTER HA
255							"\xE9" => "\xC9", # CYRILLIC CAPITAL LETTER I --> CYRILLIC SMALL LETTER I
256							"\xEA" => "\xCA", # CYRILLIC CAPITAL LETTER SHORT I --> CYRILLIC SMALL LETTER SHORT I
257							"\xEB" => "\xCB", # CYRILLIC CAPITAL LETTER KA --> CYRILLIC SMALL LETTER KA
258							"\xEC" => "\xCC", # CYRILLIC CAPITAL LETTER EL --> CYRILLIC SMALL LETTER EL
259							"\xED" => "\xCD", # CYRILLIC CAPITAL LETTER EM --> CYRILLIC SMALL LETTER EM
260							"\xEE" => "\xCE", # CYRILLIC CAPITAL LETTER EN --> CYRILLIC SMALL LETTER EN
261							"\xEF" => "\xCF", # CYRILLIC CAPITAL LETTER O --> CYRILLIC SMALL LETTER O
262							"\xF0" => "\xD0", # CYRILLIC CAPITAL LETTER PE --> CYRILLIC SMALL LETTER PE
263							"\xF1" => "\xD1", # CYRILLIC CAPITAL LETTER YA --> CYRILLIC SMALL LETTER YA
264							"\xF2" => "\xD2", # CYRILLIC CAPITAL LETTER ER --> CYRILLIC SMALL LETTER ER
265							"\xF3" => "\xD3", # CYRILLIC CAPITAL LETTER ES --> CYRILLIC SMALL LETTER ES
266							"\xF4" => "\xD4", # CYRILLIC CAPITAL LETTER TE --> CYRILLIC SMALL LETTER TE
267							"\xF5" => "\xD5", # CYRILLIC CAPITAL LETTER U --> CYRILLIC SMALL LETTER U
268							"\xF6" => "\xD6", # CYRILLIC CAPITAL LETTER ZHE --> CYRILLIC SMALL LETTER ZHE
269							"\xF7" => "\xD7", # CYRILLIC CAPITAL LETTER VE --> CYRILLIC SMALL LETTER VE
270							"\xF8" => "\xD8", # CYRILLIC CAPITAL LETTER SOFT SIGN --> CYRILLIC SMALL LETTER SOFT SIGN
271							"\xF9" => "\xD9", # CYRILLIC CAPITAL LETTER YERU --> CYRILLIC SMALL LETTER YERU
272							"\xFA" => "\xDA", # CYRILLIC CAPITAL LETTER ZE --> CYRILLIC SMALL LETTER ZE
273							"\xFB" => "\xDB", # CYRILLIC CAPITAL LETTER SHA --> CYRILLIC SMALL LETTER SHA
274							"\xFC" => "\xDC", # CYRILLIC CAPITAL LETTER E --> CYRILLIC SMALL LETTER E
275							"\xFD" => "\xDD", # CYRILLIC CAPITAL LETTER SHCHA --> CYRILLIC SMALL LETTER SHCHA
276							"\xFE" => "\xDE", # CYRILLIC CAPITAL LETTER CHE --> CYRILLIC SMALL LETTER CHE
277							"\xFF" => "\xDF", # CYRILLIC CAPITAL LETTER HARD SIGN --> CYRILLIC SMALL LETTER HARD SIGN
278							);
279							}
280
281							else {
282							croak "Don't know my package name '@{[__PACKAGE__]}'";
283							}
284
285							#
286							# @ARGV wildcard globbing
287							#
288							sub import {
289
290	0	0		0		0	if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
291	0					0	my @argv = ();
292	0					0	for (@ARGV) {
293
294							# has space
295	0	0				0	if (/\A (?:$q_char)*? [ ] /oxms) {
		0
296	0	0				0	if (my @glob = Char::Ekoi8r::glob(qq{"$_"})) {
297	0					0	push @argv, @glob;
298							}
299							else {
300	0					0	push @argv, $_;
301							}
302							}
303
304							# has wildcard metachar
305							elsif (/\A (?:$q_char)? [?] /oxms) {
306	0	0				0	if (my @glob = Char::Ekoi8r::glob($_)) {
307	0					0	push @argv, @glob;
308							}
309							else {
310	0					0	push @argv, $_;
311							}
312							}
313
314							# no wildcard globbing
315							else {
316	0					0	push @argv, $_;
317							}
318							}
319	0					0	@ARGV = @argv;
320							}
321							}
322
323							# P.230 Care with Prototypes
324							# in Chapter 6: Subroutines
325							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
326							#
327							# If you aren't careful, you can get yourself into trouble with prototypes.
328							# But if you are careful, you can do a lot of neat things with them. This is
329							# all very powerful, of course, and should only be used in moderation to make
330							# the world a better place.
331
332							# P.332 Care with Prototypes
333							# in Chapter 7: Subroutines
334							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
335							#
336							# If you aren't careful, you can get yourself into trouble with prototypes.
337							# But if you are careful, you can do a lot of neat things with them. This is
338							# all very powerful, of course, and should only be used in moderation to make
339							# the world a better place.
340
341							#
342							# Prototypes of subroutines
343							#
344	0			0		0	sub unimport {}
345							sub Char::Ekoi8r::split(;$$$);
346							sub Char::Ekoi8r::tr($$$$;$);
347							sub Char::Ekoi8r::chop(@);
348							sub Char::Ekoi8r::index($$;$);
349							sub Char::Ekoi8r::rindex($$;$);
350							sub Char::Ekoi8r::lcfirst(@);
351							sub Char::Ekoi8r::lcfirst_();
352							sub Char::Ekoi8r::lc(@);
353							sub Char::Ekoi8r::lc_();
354							sub Char::Ekoi8r::ucfirst(@);
355							sub Char::Ekoi8r::ucfirst_();
356							sub Char::Ekoi8r::uc(@);
357							sub Char::Ekoi8r::uc_();
358							sub Char::Ekoi8r::fc(@);
359							sub Char::Ekoi8r::fc_();
360							sub Char::Ekoi8r::ignorecase;
361							sub Char::Ekoi8r::classic_character_class;
362							sub Char::Ekoi8r::capture;
363							sub Char::Ekoi8r::chr(;$);
364							sub Char::Ekoi8r::chr_();
365							sub Char::Ekoi8r::glob($);
366							sub Char::Ekoi8r::glob_();
367
368							sub Char::KOI8R::ord(;$);
369							sub Char::KOI8R::ord_();
370							sub Char::KOI8R::reverse(@);
371							sub Char::KOI8R::getc(;*@);
372							sub Char::KOI8R::length(;$);
373							sub Char::KOI8R::substr($$;$$);
374							sub Char::KOI8R::index($$;$);
375							sub Char::KOI8R::rindex($$;$);
376
377							#
378							# Regexp work
379							#
380	177			177		14637	BEGIN { eval q{ use vars qw(
	177			177		1178
	177					366
	177					87050
381							$Char::KOI8R::re_a
382							$Char::KOI8R::re_t
383							$Char::KOI8R::re_n
384							$Char::KOI8R::re_r
385							) } }
386
387							#
388							# Character class
389							#
390	177			177		13011	BEGIN { eval q{ use vars qw(
	177			177		1196
	177					375
	177					3120953
391							$dot
392							$dot_s
393							$eD
394							$eS
395							$eW
396							$eH
397							$eV
398							$eR
399							$eN
400							$not_alnum
401							$not_alpha
402							$not_ascii
403							$not_blank
404							$not_cntrl
405							$not_digit
406							$not_graph
407							$not_lower
408							$not_lower_i
409							$not_print
410							$not_punct
411							$not_space
412							$not_upper
413							$not_upper_i
414							$not_word
415							$not_xdigit
416							$eb
417							$eB
418							) } }
419
420							${Char::Ekoi8r::dot} = qr{(?:[^\x0A])};
421							${Char::Ekoi8r::dot_s} = qr{(?:[\x00-\xFF])};
422							${Char::Ekoi8r::eD} = qr{(?:[^0-9])};
423
424							# Vertical tabs are now whitespace
425							# \s in a regex now matches a vertical tab in all circumstances.
426							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
427							# ${Char::Ekoi8r::eS} = qr{(?:[^\x09\x0A \x0C\x0D\x20])};
428							# ${Char::Ekoi8r::eS} = qr{(?:[^\x09\x0A\x0B\x0C\x0D\x20])};
429							${Char::Ekoi8r::eS} = qr{(?:[^\s])};
430
431							${Char::Ekoi8r::eW} = qr{(?:[^0-9A-Z_a-z])};
432							${Char::Ekoi8r::eH} = qr{(?:[^\x09\x20])};
433							${Char::Ekoi8r::eV} = qr{(?:[^\x0A\x0B\x0C\x0D])};
434							${Char::Ekoi8r::eR} = qr{(?:\x0D\x0A\|[\x0A\x0D])};
435							${Char::Ekoi8r::eN} = qr{(?:[^\x0A])};
436							${Char::Ekoi8r::not_alnum} = qr{(?:[^\x30-\x39\x41-\x5A\x61-\x7A])};
437							${Char::Ekoi8r::not_alpha} = qr{(?:[^\x41-\x5A\x61-\x7A])};
438							${Char::Ekoi8r::not_ascii} = qr{(?:[^\x00-\x7F])};
439							${Char::Ekoi8r::not_blank} = qr{(?:[^\x09\x20])};
440							${Char::Ekoi8r::not_cntrl} = qr{(?:[^\x00-\x1F\x7F])};
441							${Char::Ekoi8r::not_digit} = qr{(?:[^\x30-\x39])};
442							${Char::Ekoi8r::not_graph} = qr{(?:[^\x21-\x7F])};
443							${Char::Ekoi8r::not_lower} = qr{(?:[^\x61-\x7A])};
444							${Char::Ekoi8r::not_lower_i} = qr{(?:[\x00-\xFF])};
445							${Char::Ekoi8r::not_print} = qr{(?:[^\x20-\x7F])};
446							${Char::Ekoi8r::not_punct} = qr{(?:[^\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E])};
447							${Char::Ekoi8r::not_space} = qr{(?:[^\s\x0B])};
448							${Char::Ekoi8r::not_upper} = qr{(?:[^\x41-\x5A])};
449							${Char::Ekoi8r::not_upper_i} = qr{(?:[\x00-\xFF])};
450							${Char::Ekoi8r::not_word} = qr{(?:[^\x30-\x39\x41-\x5A\x5F\x61-\x7A])};
451							${Char::Ekoi8r::not_xdigit} = qr{(?:[^\x30-\x39\x41-\x46\x61-\x66])};
452							${Char::Ekoi8r::eb} = qr{(?:\A(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[0-9A-Z_a-z])\|(?<=[0-9A-Z_a-z])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]\|\z))};
453							${Char::Ekoi8r::eB} = qr{(?:(?<=[0-9A-Z_a-z])(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]))};
454
455							# avoid: Name "Char::Ekoi8r::foo" used only once: possible typo at here.
456							${Char::Ekoi8r::dot} = ${Char::Ekoi8r::dot};
457							${Char::Ekoi8r::dot_s} = ${Char::Ekoi8r::dot_s};
458							${Char::Ekoi8r::eD} = ${Char::Ekoi8r::eD};
459							${Char::Ekoi8r::eS} = ${Char::Ekoi8r::eS};
460							${Char::Ekoi8r::eW} = ${Char::Ekoi8r::eW};
461							${Char::Ekoi8r::eH} = ${Char::Ekoi8r::eH};
462							${Char::Ekoi8r::eV} = ${Char::Ekoi8r::eV};
463							${Char::Ekoi8r::eR} = ${Char::Ekoi8r::eR};
464							${Char::Ekoi8r::eN} = ${Char::Ekoi8r::eN};
465							${Char::Ekoi8r::not_alnum} = ${Char::Ekoi8r::not_alnum};
466							${Char::Ekoi8r::not_alpha} = ${Char::Ekoi8r::not_alpha};
467							${Char::Ekoi8r::not_ascii} = ${Char::Ekoi8r::not_ascii};
468							${Char::Ekoi8r::not_blank} = ${Char::Ekoi8r::not_blank};
469							${Char::Ekoi8r::not_cntrl} = ${Char::Ekoi8r::not_cntrl};
470							${Char::Ekoi8r::not_digit} = ${Char::Ekoi8r::not_digit};
471							${Char::Ekoi8r::not_graph} = ${Char::Ekoi8r::not_graph};
472							${Char::Ekoi8r::not_lower} = ${Char::Ekoi8r::not_lower};
473							${Char::Ekoi8r::not_lower_i} = ${Char::Ekoi8r::not_lower_i};
474							${Char::Ekoi8r::not_print} = ${Char::Ekoi8r::not_print};
475							${Char::Ekoi8r::not_punct} = ${Char::Ekoi8r::not_punct};
476							${Char::Ekoi8r::not_space} = ${Char::Ekoi8r::not_space};
477							${Char::Ekoi8r::not_upper} = ${Char::Ekoi8r::not_upper};
478							${Char::Ekoi8r::not_upper_i} = ${Char::Ekoi8r::not_upper_i};
479							${Char::Ekoi8r::not_word} = ${Char::Ekoi8r::not_word};
480							${Char::Ekoi8r::not_xdigit} = ${Char::Ekoi8r::not_xdigit};
481							${Char::Ekoi8r::eb} = ${Char::Ekoi8r::eb};
482							${Char::Ekoi8r::eB} = ${Char::Ekoi8r::eB};
483
484							#
485							# KOI8-R split
486							#
487							sub Char::Ekoi8r::split(;$$$) {
488
489							# P.794 29.2.161. split
490							# in Chapter 29: Functions
491							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
492
493							# P.951 split
494							# in Chapter 27: Functions
495							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
496
497	0			0	0	0	my $pattern = $_[0];
498	0					0	my $string = $_[1];
499	0					0	my $limit = $_[2];
500
501							# if $pattern is also omitted or is the literal space, " "
502	0	0				0	if (not defined $pattern) {
503	0					0	$pattern = ' ';
504							}
505
506							# if $string is omitted, the function splits the $_ string
507	0	0				0	if (not defined $string) {
508	0	0				0	if (defined $_) {
509	0					0	$string = $_;
510							}
511							else {
512	0					0	$string = '';
513							}
514							}
515
516	0					0	my @split = ();
517
518							# when string is empty
519	0	0				0	if ($string eq '') {
		0
520
521							# resulting list value in list context
522	0	0				0	if (wantarray) {
523	0					0	return @split;
524							}
525
526							# count of substrings in scalar context
527							else {
528	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
529	0					0	@_ = @split;
530	0					0	return scalar @_;
531							}
532							}
533
534							# split's first argument is more consistently interpreted
535							#
536							# After some changes earlier in v5.17, split's behavior has been simplified:
537							# if the PATTERN argument evaluates to a string containing one space, it is
538							# treated the way that a literal string containing one space once was.
539							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#split's_first_argument_is_more_consistently_interpreted
540
541							# if $pattern is also omitted or is the literal space, " ", the function splits
542							# on whitespace, /\s+/, after skipping any leading whitespace
543							# (and so on)
544
545							elsif ($pattern eq ' ') {
546	0	0				0	if (not defined $limit) {
547	0					0	return CORE::split(' ', $string);
548							}
549							else {
550	0					0	return CORE::split(' ', $string, $limit);
551							}
552							}
553
554							# if $limit is negative, it is treated as if an arbitrarily large $limit has been specified
555	0	0	0			0	if ((not defined $limit) or ($limit <= 0)) {
		0
556
557							# a pattern capable of matching either the null string or something longer than the
558							# null string will split the value of $string into separate characters wherever it
559							# matches the null string between characters
560							# (and so on)
561
562	0	0				0	if ('' =~ / \A $pattern \z /xms) {
563	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
564	0					0	my $limit = scalar(() = $string =~ /($pattern)/oxmsg);
565
566							# P.1024 Appendix W.10 Multibyte Processing
567							# of ISBN 1-56592-224-7 CJKV Information Processing
568							# (and so on)
569
570							# the //m modifier is assumed when you split on the pattern /^/
571							# (and so on)
572
573							# V
574	0		0			0	while ((--$limit > 0) and ($string =~ s/\A((?:$q_char)+?)$pattern//m)) {
575
576							# if the $pattern contains parentheses, then the substring matched by each pair of parentheses
577							# is included in the resulting list, interspersed with the fields that are ordinarily returned
578							# (and so on)
579
580	0					0	local $@;
581	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
582	0					0	push @split, eval('$' . $digit);
583							}
584							}
585							}
586
587							else {
588	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
589
590							# V
591	0					0	while ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
592	0					0	local $@;
593	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
594	0					0	push @split, eval('$' . $digit);
595							}
596							}
597							}
598							}
599
600							elsif ($limit > 0) {
601	0	0				0	if ('' =~ / \A $pattern \z /xms) {
602	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
603	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
604
605							# V
606	0	0				0	if ($string =~ s/\A((?:$q_char)+?)$pattern//m) {
607	0					0	local $@;
608	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
609	0					0	push @split, eval('$' . $digit);
610							}
611							}
612							}
613							}
614							else {
615	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
616	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
617
618							# V
619	0	0				0	if ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
620	0					0	local $@;
621	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
622	0					0	push @split, eval('$' . $digit);
623							}
624							}
625							}
626							}
627							}
628
629	0	0				0	if (CORE::length($string) > 0) {
630	0					0	push @split, $string;
631							}
632
633							# if $_[2] (NOT "$limit") is omitted or zero, trailing null fields are stripped from the result
634	0	0	0			0	if ((not defined $_[2]) or ($_[2] == 0)) {
635	0		0			0	while ((scalar(@split) >= 1) and ($split[-1] eq '')) {
636	0					0	pop @split;
637							}
638							}
639
640							# resulting list value in list context
641	0	0				0	if (wantarray) {
642	0					0	return @split;
643							}
644
645							# count of substrings in scalar context
646							else {
647	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
648	0					0	@_ = @split;
649	0					0	return scalar @_;
650							}
651							}
652
653							#
654							# get last subexpression offsets
655							#
656							sub _last_subexpression_offsets {
657	0			0		0	my $pattern = $_[0];
658
659							# remove comment
660	0					0	$pattern =~ s/$\?\# .*? $//oxmsg;
661
662	0					0	my $modifier = '';
663	0	0				0	if ($pattern =~ /\(\?\^? ([\-A-Za-z]+) :/oxms) {
664	0					0	$modifier = $1;
665	0					0	$modifier =~ s/-[A-Za-z]*//;
666							}
667
668							# with /x modifier
669	0					0	my @char = ();
670	0	0				0	if ($modifier =~ /x/oxms) {
671	0					0	@char = $pattern =~ /\G(
672							\\ (?:$q_char) \|
673							\# (?:$q_char)*? $ \|
674							\[ (?: \\\] \| (?:$q_char))+? \] \|
675							\(\? \|
676							(?:$q_char)
677							)/oxmsg;
678							}
679
680							# without /x modifier
681							else {
682	0					0	@char = $pattern =~ /\G(
683							\\ (?:$q_char) \|
684							\[ (?: \\\] \| (?:$q_char))+? \] \|
685							\(\? \|
686							(?:$q_char)
687							)/oxmsg;
688							}
689
690	0					0	return scalar grep { $_ eq '(' } @char;
	0					0
691							}
692
693							#
694							# KOI8-R transliteration (tr///)
695							#
696							sub Char::Ekoi8r::tr($$$$;$) {
697
698	0			0	0	0	my $bind_operator = $_[1];
699	0					0	my $searchlist = $_[2];
700	0					0	my $replacementlist = $_[3];
701	0		0			0	my $modifier = $_[4] \|\| '';
702
703	0	0				0	if ($modifier =~ /r/oxms) {
704	0	0				0	if ($bind_operator =~ / !~ /oxms) {
705	0					0	croak "Using !~ with tr///r doesn't make sense";
706							}
707							}
708
709	0					0	my @char = $_[0] =~ /\G ($q_char) /oxmsg;
710	0					0	my @searchlist = _charlist_tr($searchlist);
711	0					0	my @replacementlist = _charlist_tr($replacementlist);
712
713	0					0	my %tr = ();
714	0					0	for (my $i=0; $i <= $#searchlist; $i++) {
715	0	0				0	if (not exists $tr{$searchlist[$i]}) {
716	0	0	0			0	if (defined $replacementlist[$i] and ($replacementlist[$i] ne '')) {
		0	0
		0
717	0					0	$tr{$searchlist[$i]} = $replacementlist[$i];
718							}
719							elsif ($modifier =~ /d/oxms) {
720	0					0	$tr{$searchlist[$i]} = '';
721							}
722							elsif (defined $replacementlist[-1] and ($replacementlist[-1] ne '')) {
723	0					0	$tr{$searchlist[$i]} = $replacementlist[-1];
724							}
725							else {
726	0					0	$tr{$searchlist[$i]} = $searchlist[$i];
727							}
728							}
729							}
730
731	0					0	my $tr = 0;
732	0					0	my $replaced = '';
733	0	0				0	if ($modifier =~ /c/oxms) {
734	0					0	while (defined(my $char = shift @char)) {
735	0	0				0	if (not exists $tr{$char}) {
736	0	0				0	if (defined $replacementlist[0]) {
737	0					0	$replaced .= $replacementlist[0];
738							}
739	0					0	$tr++;
740	0	0				0	if ($modifier =~ /s/oxms) {
741	0		0			0	while (@char and (not exists $tr{$char[0]})) {
742	0					0	shift @char;
743	0					0	$tr++;
744							}
745							}
746							}
747							else {
748	0					0	$replaced .= $char;
749							}
750							}
751							}
752							else {
753	0					0	while (defined(my $char = shift @char)) {
754	0	0				0	if (exists $tr{$char}) {
755	0					0	$replaced .= $tr{$char};
756	0					0	$tr++;
757	0	0				0	if ($modifier =~ /s/oxms) {
758	0		0			0	while (@char and (exists $tr{$char[0]}) and ($tr{$char[0]} eq $tr{$char})) {
			0
759	0					0	shift @char;
760	0					0	$tr++;
761							}
762							}
763							}
764							else {
765	0					0	$replaced .= $char;
766							}
767							}
768							}
769
770	0	0				0	if ($modifier =~ /r/oxms) {
771	0					0	return $replaced;
772							}
773							else {
774	0					0	$_[0] = $replaced;
775	0	0				0	if ($bind_operator =~ / !~ /oxms) {
776	0					0	return not $tr;
777							}
778							else {
779	0					0	return $tr;
780							}
781							}
782							}
783
784							#
785							# KOI8-R chop
786							#
787							sub Char::Ekoi8r::chop(@) {
788
789	0			0	0	0	my $chop;
790	0	0				0	if (@_ == 0) {
791	0					0	my @char = /\G ($q_char) /oxmsg;
792	0					0	$chop = pop @char;
793	0					0	$_ = join '', @char;
794							}
795							else {
796	0					0	for (@_) {
797	0					0	my @char = /\G ($q_char) /oxmsg;
798	0					0	$chop = pop @char;
799	0					0	$_ = join '', @char;
800							}
801							}
802	0					0	return $chop;
803							}
804
805							#
806							# KOI8-R index by octet
807							#
808							sub Char::Ekoi8r::index($$;$) {
809
810	0			0	1	0	my($str,$substr,$position) = @_;
811	0		0			0	$position \|\|= 0;
812	0					0	my $pos = 0;
813
814	0					0	while ($pos < CORE::length($str)) {
815	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
816	0	0				0	if ($pos >= $position) {
817	0					0	return $pos;
818							}
819							}
820	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
821	0					0	$pos += CORE::length($1);
822							}
823							else {
824	0					0	$pos += 1;
825							}
826							}
827	0					0	return -1;
828							}
829
830							#
831							# KOI8-R reverse index
832							#
833							sub Char::Ekoi8r::rindex($$;$) {
834
835	0			0	0	0	my($str,$substr,$position) = @_;
836	0		0			0	$position \|\|= CORE::length($str) - 1;
837	0					0	my $pos = 0;
838	0					0	my $rindex = -1;
839
840	0		0			0	while (($pos < CORE::length($str)) and ($pos <= $position)) {
841	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
842	0					0	$rindex = $pos;
843							}
844	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
845	0					0	$pos += CORE::length($1);
846							}
847							else {
848	0					0	$pos += 1;
849							}
850							}
851	0					0	return $rindex;
852							}
853
854							#
855							# KOI8-R lower case first with parameter
856							#
857							sub Char::Ekoi8r::lcfirst(@) {
858	0	0		0	0	0	if (@_) {
859	0					0	my $s = shift @_;
860	0	0	0			0	if (@_ and wantarray) {
861	0					0	return Char::Ekoi8r::lc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
862							}
863							else {
864	0					0	return Char::Ekoi8r::lc(CORE::substr($s,0,1)) . CORE::substr($s,1);
865							}
866							}
867							else {
868	0					0	return Char::Ekoi8r::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
869							}
870							}
871
872							#
873							# KOI8-R lower case first without parameter
874							#
875							sub Char::Ekoi8r::lcfirst_() {
876	0			0	0	0	return Char::Ekoi8r::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
877							}
878
879							#
880							# KOI8-R lower case with parameter
881							#
882							sub Char::Ekoi8r::lc(@) {
883	0	0		0	0	0	if (@_) {
884	0					0	my $s = shift @_;
885	0	0	0			0	if (@_ and wantarray) {
886	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
887							}
888							else {
889	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
890							}
891							}
892							else {
893	0					0	return Char::Ekoi8r::lc_();
894							}
895							}
896
897							#
898							# KOI8-R lower case without parameter
899							#
900							sub Char::Ekoi8r::lc_() {
901	0			0	0	0	my $s = $_;
902	0	0				0	return join '', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
903							}
904
905							#
906							# KOI8-R upper case first with parameter
907							#
908							sub Char::Ekoi8r::ucfirst(@) {
909	0	0		0	0	0	if (@_) {
910	0					0	my $s = shift @_;
911	0	0	0			0	if (@_ and wantarray) {
912	0					0	return Char::Ekoi8r::uc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
913							}
914							else {
915	0					0	return Char::Ekoi8r::uc(CORE::substr($s,0,1)) . CORE::substr($s,1);
916							}
917							}
918							else {
919	0					0	return Char::Ekoi8r::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
920							}
921							}
922
923							#
924							# KOI8-R upper case first without parameter
925							#
926							sub Char::Ekoi8r::ucfirst_() {
927	0			0	0	0	return Char::Ekoi8r::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
928							}
929
930							#
931							# KOI8-R upper case with parameter
932							#
933							sub Char::Ekoi8r::uc(@) {
934	0	0		0	0	0	if (@_) {
935	0					0	my $s = shift @_;
936	0	0	0			0	if (@_ and wantarray) {
937	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
938							}
939							else {
940	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
941							}
942							}
943							else {
944	0					0	return Char::Ekoi8r::uc_();
945							}
946							}
947
948							#
949							# KOI8-R upper case without parameter
950							#
951							sub Char::Ekoi8r::uc_() {
952	0			0	0	0	my $s = $_;
953	0	0				0	return join '', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
954							}
955
956							#
957							# KOI8-R fold case with parameter
958							#
959							sub Char::Ekoi8r::fc(@) {
960	0	0		0	0	0	if (@_) {
961	0					0	my $s = shift @_;
962	0	0	0			0	if (@_ and wantarray) {
963	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
964							}
965							else {
966	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
967							}
968							}
969							else {
970	0					0	return Char::Ekoi8r::fc_();
971							}
972							}
973
974							#
975							# KOI8-R fold case without parameter
976							#
977							sub Char::Ekoi8r::fc_() {
978	0			0	0	0	my $s = $_;
979	0	0				0	return join '', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
980							}
981
982							#
983							# KOI8-R regexp capture
984							#
985							{
986							sub Char::Ekoi8r::capture {
987	0			0	1	0	return $_[0];
988							}
989							}
990
991							#
992							# KOI8-R regexp ignore case modifier
993							#
994							sub Char::Ekoi8r::ignorecase {
995
996	0			0	0	0	my @string = @_;
997	0					0	my $metachar = qr/[\@\\\|[\]{]/oxms;
998
999							# ignore case of $scalar or @array
1000	0					0	for my $string (@string) {
1001
1002							# split regexp
1003	0					0	my @char = $string =~ /\G(
1004							\[\^ \|
1005							\\? (?:$q_char)
1006							)/oxmsg;
1007
1008							# unescape character
1009	0					0	for (my $i=0; $i <= $#char; $i++) {
1010	0	0				0	next if not defined $char[$i];
1011
1012							# open character class [...]
1013	0	0				0	if ($char[$i] eq '[') {
		0
		0
		0
1014	0					0	my $left = $i;
1015
1016							# [] make die "unmatched [] in regexp ..."
1017
1018	0	0				0	if ($char[$i+1] eq ']') {
1019	0					0	$i++;
1020							}
1021
1022	0					0	while (1) {
1023	0	0				0	if (++$i > $#char) {
1024	0					0	croak "Unmatched [] in regexp";
1025							}
1026	0	0				0	if ($char[$i] eq ']') {
1027	0					0	my $right = $i;
1028	0					0	my @charlist = charlist_qr(@char[$left+1..$right-1], 'i');
1029
1030							# escape character
1031	0					0	for my $char (@charlist) {
1032	0	0				0	if (0) {
1033							}
1034
1035	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1036	0					0	$char = $1 . '\\' . $char;
1037							}
1038							}
1039
1040							# [...]
1041	0					0	splice @char, $left, $right-$left+1, '(?:' . join('\|', @charlist) . ')';
1042
1043	0					0	$i = $left;
1044	0					0	last;
1045							}
1046							}
1047							}
1048
1049							# open character class [^...]
1050							elsif ($char[$i] eq '[^') {
1051	0					0	my $left = $i;
1052
1053							# [^] make die "unmatched [] in regexp ..."
1054
1055	0	0				0	if ($char[$i+1] eq ']') {
1056	0					0	$i++;
1057							}
1058
1059	0					0	while (1) {
1060	0	0				0	if (++$i > $#char) {
1061	0					0	croak "Unmatched [] in regexp";
1062							}
1063	0	0				0	if ($char[$i] eq ']') {
1064	0					0	my $right = $i;
1065	0					0	my @charlist = charlist_not_qr(@char[$left+1..$right-1], 'i');
1066
1067							# escape character
1068	0					0	for my $char (@charlist) {
1069	0	0				0	if (0) {
1070							}
1071
1072	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1073	0					0	$char = '\\' . $char;
1074							}
1075							}
1076
1077							# [^...]
1078	0					0	splice @char, $left, $right-$left+1, '(?!' . join('\|', @charlist) . ")(?:$your_char)";
1079
1080	0					0	$i = $left;
1081	0					0	last;
1082							}
1083							}
1084							}
1085
1086							# rewrite classic character class or escape character
1087							elsif (my $char = classic_character_class($char[$i])) {
1088	0					0	$char[$i] = $char;
1089							}
1090
1091							# with /i modifier
1092							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1093	0					0	my $uc = Char::Ekoi8r::uc($char[$i]);
1094	0					0	my $fc = Char::Ekoi8r::fc($char[$i]);
1095	0	0				0	if ($uc ne $fc) {
1096	0	0				0	if (CORE::length($fc) == 1) {
1097	0					0	$char[$i] = '[' . $uc . $fc . ']';
1098							}
1099							else {
1100	0					0	$char[$i] = '(?:' . $uc . '\|' . $fc . ')';
1101							}
1102							}
1103							}
1104							}
1105
1106							# characterize
1107	0					0	for (my $i=0; $i <= $#char; $i++) {
1108	0	0				0	next if not defined $char[$i];
1109
1110	0	0				0	if (0) {
1111							}
1112
1113							# quote character before ? + * {
1114	0	0				0	elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
1115	0	0				0	if ($char[$i-1] !~ /\A [\x00-\xFF] \z/oxms) {
1116	0					0	$char[$i-1] = '(?:' . $char[$i-1] . ')';
1117							}
1118							}
1119							}
1120
1121	0					0	$string = join '', @char;
1122							}
1123
1124							# make regexp string
1125	0					0	return @string;
1126							}
1127
1128							#
1129							# classic character class ( \D \S \W \d \s \w \C \X \H \V \h \v \R \N \b \B )
1130							#
1131							sub Char::Ekoi8r::classic_character_class {
1132	0			0	0	0	my($char) = @_;
1133
1134							return {
1135	0		0			0	'\D' => '${Char::Ekoi8r::eD}',
1136							'\S' => '${Char::Ekoi8r::eS}',
1137							'\W' => '${Char::Ekoi8r::eW}',
1138							'\d' => '[0-9]',
1139
1140							# Before Perl 5.6, \s only matched the five whitespace characters
1141							# tab, newline, form-feed, carriage return, and the space character
1142							# itself, which, taken together, is the character class [\t\n\f\r ].
1143
1144							# Vertical tabs are now whitespace
1145							# \s in a regex now matches a vertical tab in all circumstances.
1146							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1147							# \t \n \v \f \r space
1148							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1149							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1150							'\s' => '\s',
1151
1152							'\w' => '[0-9A-Z_a-z]',
1153							'\C' => '[\x00-\xFF]',
1154							'\X' => 'X',
1155
1156							# \h \v \H \V
1157
1158							# P.114 Character Class Shortcuts
1159							# in Chapter 7: In the World of Regular Expressions
1160							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
1161
1162							# P.357 13.2.3 Whitespace
1163							# in Chapter 13: perlrecharclass: Perl Regular Expression Character Classes
1164							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1165							#
1166							# 0x00009 CHARACTER TABULATION h s
1167							# 0x0000a LINE FEED (LF) vs
1168							# 0x0000b LINE TABULATION v
1169							# 0x0000c FORM FEED (FF) vs
1170							# 0x0000d CARRIAGE RETURN (CR) vs
1171							# 0x00020 SPACE h s
1172
1173							# P.196 Table 5-9. Alphanumeric regex metasymbols
1174							# in Chapter 5. Pattern Matching
1175							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1176
1177							# (and so on)
1178
1179							'\H' => '${Char::Ekoi8r::eH}',
1180							'\V' => '${Char::Ekoi8r::eV}',
1181							'\h' => '[\x09\x20]',
1182							'\v' => '[\x0A\x0B\x0C\x0D]',
1183							'\R' => '${Char::Ekoi8r::eR}',
1184
1185							# \N
1186							#
1187							# http://perldoc.perl.org/perlre.html
1188							# Character Classes and other Special Escapes
1189							# Any character but \n (experimental). Not affected by /s modifier
1190
1191							'\N' => '${Char::Ekoi8r::eN}',
1192
1193							# \b \B
1194
1195							# P.180 Boundaries: The \b and \B Assertions
1196							# in Chapter 5: Pattern Matching
1197							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1198
1199							# P.219 Boundaries: The \b and \B Assertions
1200							# in Chapter 5: Pattern Matching
1201							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1202
1203							# '\b' => '(?:(?<=\A\|\W)(?=\w)\|(?<=\w)(?=\W\|\z))',
1204							'\b' => '${Char::Ekoi8r::eb}',
1205
1206							# '\B' => '(?:(?<=\w)(?=\w)\|(?<=\W)(?=\W))',
1207							'\B' => '${Char::Ekoi8r::eB}',
1208
1209							}->{$char} \|\| '';
1210							}
1211
1212							#
1213							# prepare KOI8-R characters per length
1214							#
1215
1216							# 1 octet characters
1217							my @chars1 = ();
1218							sub chars1 {
1219	0	0		0	0	0	if (@chars1) {
1220	0					0	return @chars1;
1221							}
1222	0	0				0	if (exists $range_tr{1}) {
1223	0					0	my @ranges = @{ $range_tr{1} };
	0					0
1224	0					0	while (my @range = splice(@ranges,0,1)) {
1225	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1226	0					0	push @chars1, pack 'C', $oct0;
1227							}
1228							}
1229							}
1230	0					0	return @chars1;
1231							}
1232
1233							# 2 octets characters
1234							my @chars2 = ();
1235							sub chars2 {
1236	0	0		0	0	0	if (@chars2) {
1237	0					0	return @chars2;
1238							}
1239	0	0				0	if (exists $range_tr{2}) {
1240	0					0	my @ranges = @{ $range_tr{2} };
	0					0
1241	0					0	while (my @range = splice(@ranges,0,2)) {
1242	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1243	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1244	0					0	push @chars2, pack 'CC', $oct0,$oct1;
1245							}
1246							}
1247							}
1248							}
1249	0					0	return @chars2;
1250							}
1251
1252							# 3 octets characters
1253							my @chars3 = ();
1254							sub chars3 {
1255	0	0		0	0	0	if (@chars3) {
1256	0					0	return @chars3;
1257							}
1258	0	0				0	if (exists $range_tr{3}) {
1259	0					0	my @ranges = @{ $range_tr{3} };
	0					0
1260	0					0	while (my @range = splice(@ranges,0,3)) {
1261	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1262	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1263	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1264	0					0	push @chars3, pack 'CCC', $oct0,$oct1,$oct2;
1265							}
1266							}
1267							}
1268							}
1269							}
1270	0					0	return @chars3;
1271							}
1272
1273							# 4 octets characters
1274							my @chars4 = ();
1275							sub chars4 {
1276	0	0		0	0	0	if (@chars4) {
1277	0					0	return @chars4;
1278							}
1279	0	0				0	if (exists $range_tr{4}) {
1280	0					0	my @ranges = @{ $range_tr{4} };
	0					0
1281	0					0	while (my @range = splice(@ranges,0,4)) {
1282	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1283	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1284	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1285	0					0	for my $oct3 (@{$range[3]}) {
	0					0
1286	0					0	push @chars4, pack 'CCCC', $oct0,$oct1,$oct2,$oct3;
1287							}
1288							}
1289							}
1290							}
1291							}
1292							}
1293	0					0	return @chars4;
1294							}
1295
1296							#
1297							# KOI8-R open character list for tr
1298							#
1299							sub _charlist_tr {
1300
1301	0			0		0	local $_ = shift @_;
1302
1303							# unescape character
1304	0					0	my @char = ();
1305	0					0	while (not /\G \z/oxmsgc) {
1306	0	0				0	if (/\G (\\0?55\|\\x2[Dd]\|\\-) /oxmsgc) {
		0
		0
		0
		0
		0
		0
1307	0					0	push @char, '\-';
1308							}
1309							elsif (/\G \\ ([0-7]{2,3}) /oxmsgc) {
1310	0					0	push @char, CORE::chr(oct $1);
1311							}
1312							elsif (/\G \\x ([0-9A-Fa-f]{1,2}) /oxmsgc) {
1313	0					0	push @char, CORE::chr(hex $1);
1314							}
1315							elsif (/\G \\c ([\x40-\x5F]) /oxmsgc) {
1316	0					0	push @char, CORE::chr(CORE::ord($1) & 0x1F);
1317							}
1318							elsif (/\G (\\ [0nrtfbae]) /oxmsgc) {
1319	0					0	push @char, {
1320							'\0' => "\0",
1321							'\n' => "\n",
1322							'\r' => "\r",
1323							'\t' => "\t",
1324							'\f' => "\f",
1325							'\b' => "\x08", # \b means backspace in character class
1326							'\a' => "\a",
1327							'\e' => "\e",
1328							}->{$1};
1329							}
1330							elsif (/\G \\ ($q_char) /oxmsgc) {
1331	0					0	push @char, $1;
1332							}
1333							elsif (/\G ($q_char) /oxmsgc) {
1334	0					0	push @char, $1;
1335							}
1336							}
1337
1338							# join separated multiple-octet
1339	0					0	@char = join('',@char) =~ /\G (\\-\|$q_char) /oxmsg;
1340
1341							# unescape '-'
1342	0					0	my @i = ();
1343	0					0	for my $i (0 .. $#char) {
1344	0	0				0	if ($char[$i] eq '\-') {
		0
1345	0					0	$char[$i] = '-';
1346							}
1347							elsif ($char[$i] eq '-') {
1348	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1349	0					0	push @i, $i;
1350							}
1351							}
1352							}
1353
1354							# open character list (reverse for splice)
1355	0					0	for my $i (CORE::reverse @i) {
1356	0					0	my @range = ();
1357
1358							# range error
1359	0	0	0			0	if ((CORE::length($char[$i-1]) > CORE::length($char[$i+1])) or ($char[$i-1] gt $char[$i+1])) {
1360	0					0	croak "Invalid tr/// range \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1361							}
1362
1363							# range of multiple-octet code
1364	0	0				0	if (CORE::length($char[$i-1]) == 1) {
		0
		0
		0
1365	0	0				0	if (CORE::length($char[$i+1]) == 1) {
		0
		0
		0
1366	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars1();
	0					0
1367							}
1368							elsif (CORE::length($char[$i+1]) == 2) {
1369	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1370	0					0	push @range, grep {$_ le $char[$i+1]} chars2();
	0					0
1371							}
1372							elsif (CORE::length($char[$i+1]) == 3) {
1373	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1374	0					0	push @range, chars2();
1375	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1376							}
1377							elsif (CORE::length($char[$i+1]) == 4) {
1378	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1379	0					0	push @range, chars2();
1380	0					0	push @range, chars3();
1381	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1382							}
1383							else {
1384	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1385							}
1386							}
1387							elsif (CORE::length($char[$i-1]) == 2) {
1388	0	0				0	if (CORE::length($char[$i+1]) == 2) {
		0
		0
1389	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars2();
	0					0
1390							}
1391							elsif (CORE::length($char[$i+1]) == 3) {
1392	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1393	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1394							}
1395							elsif (CORE::length($char[$i+1]) == 4) {
1396	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1397	0					0	push @range, chars3();
1398	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1399							}
1400							else {
1401	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1402							}
1403							}
1404							elsif (CORE::length($char[$i-1]) == 3) {
1405	0	0				0	if (CORE::length($char[$i+1]) == 3) {
		0
1406	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars3();
	0					0
1407							}
1408							elsif (CORE::length($char[$i+1]) == 4) {
1409	0					0	push @range, grep {$char[$i-1] le $_} chars3();
	0					0
1410	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1411							}
1412							else {
1413	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1414							}
1415							}
1416							elsif (CORE::length($char[$i-1]) == 4) {
1417	0	0				0	if (CORE::length($char[$i+1]) == 4) {
1418	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars4();
	0					0
1419							}
1420							else {
1421	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1422							}
1423							}
1424							else {
1425	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1426							}
1427
1428	0					0	splice @char, $i-1, 3, @range;
1429							}
1430
1431	0					0	return @char;
1432							}
1433
1434							#
1435							# KOI8-R open character class
1436							#
1437							sub _cc {
1438	0	0		0		0	if (scalar(@_) == 0) {
		0
		0
1439	0					0	die __FILE__, ": subroutine cc got no parameter.";
1440							}
1441							elsif (scalar(@_) == 1) {
1442	0					0	return sprintf('\x%02X',$_[0]);
1443							}
1444							elsif (scalar(@_) == 2) {
1445	0	0				0	if ($_[0] > $_[1]) {
		0
		0
1446	0					0	die __FILE__, ": subroutine cc got \$_[0] > \$_[1] parameters).";
1447							}
1448							elsif ($_[0] == $_[1]) {
1449	0					0	return sprintf('\x%02X',$_[0]);
1450							}
1451							elsif (($_[0]+1) == $_[1]) {
1452	0					0	return sprintf('[\\x%02X\\x%02X]',$_[0],$_[1]);
1453							}
1454							else {
1455	0					0	return sprintf('[\\x%02X-\\x%02X]',$_[0],$_[1]);
1456							}
1457							}
1458							else {
1459	0					0	die __FILE__, ": subroutine cc got 3 or more parameters (@{[scalar(@_)]} parameters).";
	0					0
1460							}
1461							}
1462
1463							#
1464							# KOI8-R octet range
1465							#
1466							sub _octets {
1467	0			0		0	my $length = shift @_;
1468
1469	0	0				0	if ($length == 1) {
1470	0					0	my($a1) = unpack 'C', $_[0];
1471	0					0	my($z1) = unpack 'C', $_[1];
1472
1473	0	0				0	if ($a1 > $z1) {
1474	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$a1) . '-\x' . unpack('H',$z1);
1475							}
1476
1477	0	0				0	if ($a1 == $z1) {
		0
1478	0					0	return sprintf('\x%02X',$a1);
1479							}
1480							elsif (($a1+1) == $z1) {
1481	0					0	return sprintf('\x%02X\x%02X',$a1,$z1);
1482							}
1483							else {
1484	0					0	return sprintf('\x%02X-\x%02X',$a1,$z1);
1485							}
1486							}
1487							else {
1488	0					0	die __FILE__, ": subroutine _octets got invalid length ($length).";
1489							}
1490							}
1491
1492							#
1493							# KOI8-R range regexp
1494							#
1495							sub _range_regexp {
1496	0			0		0	my($length,$first,$last) = @_;
1497
1498	0					0	my @range_regexp = ();
1499	0	0				0	if (not exists $range_tr{$length}) {
1500	0					0	return @range_regexp;
1501							}
1502
1503	0					0	my @ranges = @{ $range_tr{$length} };
	0					0
1504	0					0	while (my @range = splice(@ranges,0,$length)) {
1505	0					0	my $min = '';
1506	0					0	my $max = '';
1507	0					0	for (my $i=0; $i < $length; $i++) {
1508	0					0	$min .= pack 'C', $range[$i][0];
1509	0					0	$max .= pack 'C', $range[$i][-1];
1510							}
1511
1512							# min___max
1513							# FIRST_____________LAST
1514							# (nothing)
1515
1516	0	0	0			0	if ($max lt $first) {
		0	0
		0	0
		0	0
		0	0
		0	0
		0	0
1517							}
1518
1519							# **********
1520							# min_________max
1521							# FIRST_____________LAST
1522							# **********
1523
1524							elsif (($min le $first) and ($first le $max) and ($max le $last)) {
1525	0					0	push @range_regexp, _octets($length,$first,$max,$min,$max);
1526							}
1527
1528							# **********************
1529							# min________________max
1530							# FIRST_____________LAST
1531							# **********************
1532
1533							elsif (($min eq $first) and ($max eq $last)) {
1534	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1535							}
1536
1537							# *********
1538							# min___max
1539							# FIRST_____________LAST
1540							# *********
1541
1542							elsif (($first le $min) and ($max le $last)) {
1543	0					0	push @range_regexp, _octets($length,$min,$max,$min,$max);
1544							}
1545
1546							# **********************
1547							# min__________________________max
1548							# FIRST_____________LAST
1549							# **********************
1550
1551							elsif (($min le $first) and ($last le $max)) {
1552	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1553							}
1554
1555							# *********
1556							# min________max
1557							# FIRST_____________LAST
1558							# *********
1559
1560							elsif (($first le $min) and ($min le $last) and ($last le $max)) {
1561	0					0	push @range_regexp, _octets($length,$min,$last,$min,$max);
1562							}
1563
1564							# min___max
1565							# FIRST_____________LAST
1566							# (nothing)
1567
1568							elsif ($last lt $min) {
1569							}
1570
1571							else {
1572	0					0	die __FILE__, ": subroutine _range_regexp panic.";
1573							}
1574							}
1575
1576	0					0	return @range_regexp;
1577							}
1578
1579							#
1580							# KOI8-R open character list for qr and not qr
1581							#
1582							sub _charlist {
1583
1584	0			0		0	my $modifier = pop @_;
1585	0					0	my @char = @_;
1586
1587	0	0				0	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
1588
1589							# unescape character
1590	0					0	for (my $i=0; $i <= $#char; $i++) {
1591
1592							# escape - to ...
1593	0	0	0			0	if ($char[$i] eq '-') {
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
1594	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1595	0					0	$char[$i] = '...';
1596							}
1597							}
1598
1599							# octal escape sequence
1600							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
1601	0					0	$char[$i] = octchr($1);
1602							}
1603
1604							# hexadecimal escape sequence
1605							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
1606	0					0	$char[$i] = hexchr($1);
1607							}
1608
1609							# \N{CHARNAME} --> N\{CHARNAME}
1610							elsif ($char[$i] =~ /\A \\ (N) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1611	0					0	$char[$i] = $1 . '\\' . $2;
1612							}
1613
1614							# \p{PROPERTY} --> p\{PROPERTY}
1615							elsif ($char[$i] =~ /\A \\ (p) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1616	0					0	$char[$i] = $1 . '\\' . $2;
1617							}
1618
1619							# \P{PROPERTY} --> P\{PROPERTY}
1620							elsif ($char[$i] =~ /\A \\ (P) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1621	0					0	$char[$i] = $1 . '\\' . $2;
1622							}
1623
1624							# \p, \P, \X --> p, P, X
1625							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
1626	0					0	$char[$i] = $1;
1627							}
1628
1629							elsif ($char[$i] =~ /\A \\ ([0-7]{2,3}) \z/oxms) {
1630	0					0	$char[$i] = CORE::chr oct $1;
1631							}
1632							elsif ($char[$i] =~ /\A \\x ([0-9A-Fa-f]{1,2}) \z/oxms) {
1633	0					0	$char[$i] = CORE::chr hex $1;
1634							}
1635							elsif ($char[$i] =~ /\A \\c ([\x40-\x5F]) \z/oxms) {
1636	0					0	$char[$i] = CORE::chr(CORE::ord($1) & 0x1F);
1637							}
1638							elsif ($char[$i] =~ /\A (\\ [0nrtfbaedswDSWHVhvR]) \z/oxms) {
1639	0					0	$char[$i] = {
1640							'\0' => "\0",
1641							'\n' => "\n",
1642							'\r' => "\r",
1643							'\t' => "\t",
1644							'\f' => "\f",
1645							'\b' => "\x08", # \b means backspace in character class
1646							'\a' => "\a",
1647							'\e' => "\e",
1648							'\d' => '[0-9]',
1649
1650							# Vertical tabs are now whitespace
1651							# \s in a regex now matches a vertical tab in all circumstances.
1652							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1653							# \t \n \v \f \r space
1654							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1655							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1656							'\s' => '\s',
1657
1658							'\w' => '[0-9A-Z_a-z]',
1659							'\D' => '${Char::Ekoi8r::eD}',
1660							'\S' => '${Char::Ekoi8r::eS}',
1661							'\W' => '${Char::Ekoi8r::eW}',
1662
1663							'\H' => '${Char::Ekoi8r::eH}',
1664							'\V' => '${Char::Ekoi8r::eV}',
1665							'\h' => '[\x09\x20]',
1666							'\v' => '[\x0A\x0B\x0C\x0D]',
1667							'\R' => '${Char::Ekoi8r::eR}',
1668
1669							}->{$1};
1670							}
1671
1672							# POSIX-style character classes
1673							elsif ($ignorecase and ($char[$i] =~ /\A ( \[\: \^? (?:lower\|upper) :\] ) \z/oxms)) {
1674	0					0	$char[$i] = {
1675
1676							'[:lower:]' => '[\x41-\x5A\x61-\x7A]',
1677							'[:upper:]' => '[\x41-\x5A\x61-\x7A]',
1678							'[:^lower:]' => '${Char::Ekoi8r::not_lower_i}',
1679							'[:^upper:]' => '${Char::Ekoi8r::not_upper_i}',
1680
1681							}->{$1};
1682							}
1683							elsif ($char[$i] =~ /\A ( \[\: \^? (?:alnum\|alpha\|ascii\|blank\|cntrl\|digit\|graph\|lower\|print\|punct\|space\|upper\|word\|xdigit) :\] ) \z/oxms) {
1684	0					0	$char[$i] = {
1685
1686							'[:alnum:]' => '[\x30-\x39\x41-\x5A\x61-\x7A]',
1687							'[:alpha:]' => '[\x41-\x5A\x61-\x7A]',
1688							'[:ascii:]' => '[\x00-\x7F]',
1689							'[:blank:]' => '[\x09\x20]',
1690							'[:cntrl:]' => '[\x00-\x1F\x7F]',
1691							'[:digit:]' => '[\x30-\x39]',
1692							'[:graph:]' => '[\x21-\x7F]',
1693							'[:lower:]' => '[\x61-\x7A]',
1694							'[:print:]' => '[\x20-\x7F]',
1695							'[:punct:]' => '[\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]',
1696
1697							# P.174 POSIX-Style Character Classes
1698							# in Chapter 5: Pattern Matching
1699							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1700
1701							# P.311 11.2.4 Character Classes and other Special Escapes
1702							# in Chapter 11: perlre: Perl regular expressions
1703							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1704
1705							# P.210 POSIX-Style Character Classes
1706							# in Chapter 5: Pattern Matching
1707							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1708
1709							'[:space:]' => '[\s\x0B]', # "\s" plus vertical tab ("\cK")
1710
1711							'[:upper:]' => '[\x41-\x5A]',
1712							'[:word:]' => '[\x30-\x39\x41-\x5A\x5F\x61-\x7A]',
1713							'[:xdigit:]' => '[\x30-\x39\x41-\x46\x61-\x66]',
1714							'[:^alnum:]' => '${Char::Ekoi8r::not_alnum}',
1715							'[:^alpha:]' => '${Char::Ekoi8r::not_alpha}',
1716							'[:^ascii:]' => '${Char::Ekoi8r::not_ascii}',
1717							'[:^blank:]' => '${Char::Ekoi8r::not_blank}',
1718							'[:^cntrl:]' => '${Char::Ekoi8r::not_cntrl}',
1719							'[:^digit:]' => '${Char::Ekoi8r::not_digit}',
1720							'[:^graph:]' => '${Char::Ekoi8r::not_graph}',
1721							'[:^lower:]' => '${Char::Ekoi8r::not_lower}',
1722							'[:^print:]' => '${Char::Ekoi8r::not_print}',
1723							'[:^punct:]' => '${Char::Ekoi8r::not_punct}',
1724							'[:^space:]' => '${Char::Ekoi8r::not_space}',
1725							'[:^upper:]' => '${Char::Ekoi8r::not_upper}',
1726							'[:^word:]' => '${Char::Ekoi8r::not_word}',
1727							'[:^xdigit:]' => '${Char::Ekoi8r::not_xdigit}',
1728
1729							}->{$1};
1730							}
1731							elsif ($char[$i] =~ /\A \\ ($q_char) \z/oxms) {
1732	0					0	$char[$i] = $1;
1733							}
1734							}
1735
1736							# open character list
1737	0					0	my @singleoctet = ();
1738	0					0	my @multipleoctet = ();
1739	0					0	for (my $i=0; $i <= $#char; ) {
1740
1741							# escaped -
1742	0	0	0			0	if (defined($char[$i+1]) and ($char[$i+1] eq '...')) {
		0
		0
		0
		0
		0
1743	0					0	$i += 1;
1744	0					0	next;
1745							}
1746
1747							# make range regexp
1748							elsif ($char[$i] eq '...') {
1749
1750							# range error
1751	0	0				0	if (CORE::length($char[$i-1]) > CORE::length($char[$i+1])) {
		0
1752	0					0	croak 'Invalid [] range in regexp (length(A) > length(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1753							}
1754							elsif (CORE::length($char[$i-1]) == CORE::length($char[$i+1])) {
1755	0	0				0	if ($char[$i-1] gt $char[$i+1]) {
1756	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1757							}
1758							}
1759
1760							# make range regexp per length
1761	0					0	for my $length (CORE::length($char[$i-1]) .. CORE::length($char[$i+1])) {
1762	0					0	my @regexp = ();
1763
1764							# is first and last
1765	0	0	0			0	if (($length == CORE::length($char[$i-1])) and ($length == CORE::length($char[$i+1]))) {
		0	0
		0
		0
1766	0					0	push @regexp, _range_regexp($length, $char[$i-1], $char[$i+1]);
1767							}
1768
1769							# is first
1770							elsif ($length == CORE::length($char[$i-1])) {
1771	0					0	push @regexp, _range_regexp($length, $char[$i-1], "\xFF" x $length);
1772							}
1773
1774							# is inside in first and last
1775							elsif ((CORE::length($char[$i-1]) < $length) and ($length < CORE::length($char[$i+1]))) {
1776	0					0	push @regexp, _range_regexp($length, "\x00" x $length, "\xFF" x $length);
1777							}
1778
1779							# is last
1780							elsif ($length == CORE::length($char[$i+1])) {
1781	0					0	push @regexp, _range_regexp($length, "\x00" x $length, $char[$i+1]);
1782							}
1783
1784							else {
1785	0					0	die __FILE__, ": subroutine make_regexp panic.";
1786							}
1787
1788	0	0				0	if ($length == 1) {
1789	0					0	push @singleoctet, @regexp;
1790							}
1791							else {
1792	0					0	push @multipleoctet, @regexp;
1793							}
1794							}
1795
1796	0					0	$i += 2;
1797							}
1798
1799							# with /i modifier
1800							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1801	0	0				0	if ($modifier =~ /i/oxms) {
1802	0					0	my $uc = Char::Ekoi8r::uc($char[$i]);
1803	0					0	my $fc = Char::Ekoi8r::fc($char[$i]);
1804	0	0				0	if ($uc ne $fc) {
1805	0	0				0	if (CORE::length($fc) == 1) {
1806	0					0	push @singleoctet, $uc, $fc;
1807							}
1808							else {
1809	0					0	push @singleoctet, $uc;
1810	0					0	push @multipleoctet, $fc;
1811							}
1812							}
1813							else {
1814	0					0	push @singleoctet, $char[$i];
1815							}
1816							}
1817							else {
1818	0					0	push @singleoctet, $char[$i];
1819							}
1820	0					0	$i += 1;
1821							}
1822
1823							# single character of single octet code
1824							elsif ($char[$i] =~ /\A (?: \\h ) \z/oxms) {
1825	0					0	push @singleoctet, "\t", "\x20";
1826	0					0	$i += 1;
1827							}
1828							elsif ($char[$i] =~ /\A (?: \\v ) \z/oxms) {
1829	0					0	push @singleoctet, "\x0A", "\x0B", "\x0C", "\x0D";
1830	0					0	$i += 1;
1831							}
1832							elsif ($char[$i] =~ /\A (?: \\d \| \\s \| \\w ) \z/oxms) {
1833	0					0	push @singleoctet, $char[$i];
1834	0					0	$i += 1;
1835							}
1836
1837							# single character of multiple-octet code
1838							else {
1839	0					0	push @multipleoctet, $char[$i];
1840	0					0	$i += 1;
1841							}
1842							}
1843
1844							# quote metachar
1845	0					0	for (@singleoctet) {
1846	0	0				0	if ($_ eq '...') {
		0
		0
		0
		0
1847	0					0	$_ = '-';
1848							}
1849							elsif (/\A \n \z/oxms) {
1850	0					0	$_ = '\n';
1851							}
1852							elsif (/\A \r \z/oxms) {
1853	0					0	$_ = '\r';
1854							}
1855							elsif (/\A ([\x00-\x20\x7F-\xFF]) \z/oxms) {
1856	0					0	$_ = sprintf('\x%02X', CORE::ord $1);
1857							}
1858							elsif (/\A [\x00-\xFF] \z/oxms) {
1859	0					0	$_ = quotemeta $_;
1860							}
1861							}
1862
1863							# return character list
1864	0					0	return \@singleoctet, \@multipleoctet;
1865							}
1866
1867							#
1868							# KOI8-R octal escape sequence
1869							#
1870							sub octchr {
1871	0			0	0	0	my($octdigit) = @_;
1872
1873	0					0	my @binary = ();
1874	0					0	for my $octal (split(//,$octdigit)) {
1875	0					0	push @binary, {
1876							'0' => '000',
1877							'1' => '001',
1878							'2' => '010',
1879							'3' => '011',
1880							'4' => '100',
1881							'5' => '101',
1882							'6' => '110',
1883							'7' => '111',
1884							}->{$octal};
1885							}
1886	0					0	my $binary = join '', @binary;
1887
1888	0					0	my $octchr = {
1889							# 1234567
1890							1 => pack('B*', "0000000$binary"),
1891							2 => pack('B*', "000000$binary"),
1892							3 => pack('B*', "00000$binary"),
1893							4 => pack('B*', "0000$binary"),
1894							5 => pack('B*', "000$binary"),
1895							6 => pack('B*', "00$binary"),
1896							7 => pack('B*', "0$binary"),
1897							0 => pack('B*', "$binary"),
1898
1899							}->{CORE::length($binary) % 8};
1900
1901	0					0	return $octchr;
1902							}
1903
1904							#
1905							# KOI8-R hexadecimal escape sequence
1906							#
1907							sub hexchr {
1908	0			0	0	0	my($hexdigit) = @_;
1909
1910	0					0	my $hexchr = {
1911							1 => pack('H*', "0$hexdigit"),
1912							0 => pack('H*', "$hexdigit"),
1913
1914							}->{CORE::length($_[0]) % 2};
1915
1916	0					0	return $hexchr;
1917							}
1918
1919							#
1920							# KOI8-R open character list for qr
1921							#
1922							sub charlist_qr {
1923
1924	0			0	0	0	my $modifier = pop @_;
1925	0					0	my @char = @_;
1926
1927	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
1928	0					0	my @singleoctet = @$singleoctet;
1929	0					0	my @multipleoctet = @$multipleoctet;
1930
1931							# return character list
1932	0	0				0	if (scalar(@singleoctet) >= 1) {
1933
1934							# with /i modifier
1935	0	0				0	if ($modifier =~ m/i/oxms) {
1936	0					0	my %singleoctet_ignorecase = ();
1937	0					0	for (@singleoctet) {
1938	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
1939	0					0	for my $ord (hex($1) .. hex($2)) {
1940	0					0	my $char = CORE::chr($ord);
1941	0					0	my $uc = Char::Ekoi8r::uc($char);
1942	0					0	my $fc = Char::Ekoi8r::fc($char);
1943	0	0				0	if ($uc eq $fc) {
1944	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
1945							}
1946							else {
1947	0	0				0	if (CORE::length($fc) == 1) {
1948	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1949	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
1950							}
1951							else {
1952	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1953	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
1954							}
1955							}
1956							}
1957							}
1958	0	0				0	if ($_ ne '') {
1959	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
1960							}
1961							}
1962	0					0	my $i = 0;
1963	0					0	my @singleoctet_ignorecase = ();
1964	0					0	for my $ord (0 .. 255) {
1965	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
1966	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
1967							}
1968							else {
1969	0					0	$i++;
1970							}
1971							}
1972	0					0	@singleoctet = ();
1973	0					0	for my $range (@singleoctet_ignorecase) {
1974	0	0				0	if (ref $range) {
1975	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
1976	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
1977							}
1978							elsif (scalar(@{$range}) == 2) {
1979	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1980							}
1981							else {
1982	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1983							}
1984							}
1985							}
1986							}
1987
1988	0					0	my $not_anchor = '';
1989
1990	0					0	push @multipleoctet, join('', $not_anchor, '[', @singleoctet, ']' );
1991							}
1992	0	0				0	if (scalar(@multipleoctet) >= 2) {
1993	0					0	return '(?:' . join('\|', @multipleoctet) . ')';
1994							}
1995							else {
1996	0					0	return $multipleoctet[0];
1997							}
1998							}
1999
2000							#
2001							# KOI8-R open character list for not qr
2002							#
2003							sub charlist_not_qr {
2004
2005	0			0	0	0	my $modifier = pop @_;
2006	0					0	my @char = @_;
2007
2008	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
2009	0					0	my @singleoctet = @$singleoctet;
2010	0					0	my @multipleoctet = @$multipleoctet;
2011
2012							# with /i modifier
2013	0	0				0	if ($modifier =~ m/i/oxms) {
2014	0					0	my %singleoctet_ignorecase = ();
2015	0					0	for (@singleoctet) {
2016	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
2017	0					0	for my $ord (hex($1) .. hex($2)) {
2018	0					0	my $char = CORE::chr($ord);
2019	0					0	my $uc = Char::Ekoi8r::uc($char);
2020	0					0	my $fc = Char::Ekoi8r::fc($char);
2021	0	0				0	if ($uc eq $fc) {
2022	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
2023							}
2024							else {
2025	0	0				0	if (CORE::length($fc) == 1) {
2026	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2027	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
2028							}
2029							else {
2030	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2031	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
2032							}
2033							}
2034							}
2035							}
2036	0	0				0	if ($_ ne '') {
2037	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
2038							}
2039							}
2040	0					0	my $i = 0;
2041	0					0	my @singleoctet_ignorecase = ();
2042	0					0	for my $ord (0 .. 255) {
2043	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
2044	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
2045							}
2046							else {
2047	0					0	$i++;
2048							}
2049							}
2050	0					0	@singleoctet = ();
2051	0					0	for my $range (@singleoctet_ignorecase) {
2052	0	0				0	if (ref $range) {
2053	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
2054	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
2055							}
2056							elsif (scalar(@{$range}) == 2) {
2057	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2058							}
2059							else {
2060	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2061							}
2062							}
2063							}
2064							}
2065
2066							# return character list
2067	0	0				0	if (scalar(@multipleoctet) >= 1) {
2068	0	0				0	if (scalar(@singleoctet) >= 1) {
2069
2070							# any character other than multiple-octet and single octet character class
2071	0					0	return '(?!' . join('\|', @multipleoctet) . ')(?:[^' . join('', @singleoctet) . '])';
2072							}
2073							else {
2074
2075							# any character other than multiple-octet character class
2076	0					0	return '(?!' . join('\|', @multipleoctet) . ")(?:$your_char)";
2077							}
2078							}
2079							else {
2080	0	0				0	if (scalar(@singleoctet) >= 1) {
2081
2082							# any character other than single octet character class
2083	0					0	return '(?:[^' . join('', @singleoctet) . '])';
2084							}
2085							else {
2086
2087							# any character
2088	0					0	return "(?:$your_char)";
2089							}
2090							}
2091							}
2092
2093							#
2094							# open file in read mode
2095							#
2096							sub _open_r {
2097	177			177		655	my(undef,$file) = @_;
2098	177					979	$file =~ s#\A (\s) #./$1#oxms;
2099	177		33			16254	return eval(q{open($_[0],'<',$_[1])}) \|\|
2100							open($_[0],"< $file\0");
2101							}
2102
2103							#
2104							# open file in write mode
2105							#
2106							sub _open_w {
2107	0			0		0	my(undef,$file) = @_;
2108	0					0	$file =~ s#\A (\s) #./$1#oxms;
2109	0		0			0	return eval(q{open($_[0],'>',$_[1])}) \|\|
2110							open($_[0],"> $file\0");
2111							}
2112
2113							#
2114							# open file in append mode
2115							#
2116							sub _open_a {
2117	0			0		0	my(undef,$file) = @_;
2118	0					0	$file =~ s#\A (\s) #./$1#oxms;
2119	0		0			0	return eval(q{open($_[0],'>>',$_[1])}) \|\|
2120							open($_[0],">> $file\0");
2121							}
2122
2123							#
2124							# safe system
2125							#
2126							sub _systemx {
2127
2128							# P.707 29.2.33. exec
2129							# in Chapter 29: Functions
2130							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2131							#
2132							# Be aware that in older releases of Perl, exec (and system) did not flush
2133							# your output buffer, so you needed to enable command buffering by setting $\|
2134							# on one or more filehandles to avoid lost output in the case of exec, or
2135							# misordererd output in the case of system. This situation was largely remedied
2136							# in the 5.6 release of Perl. (So, 5.005 release not yet.)
2137
2138							# P.855 exec
2139							# in Chapter 27: Functions
2140							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2141							#
2142							# In very old release of Perl (before v5.6), exec (and system) did not flush
2143							# your output buffer, so you needed to enable command buffering by setting $\|
2144							# on one or more filehandles to avoid lost output with exec or misordered
2145							# output with system.
2146
2147	177			177		760	$\| = 1;
2148
2149							# P.565 23.1.2. Cleaning Up Your Environment
2150							# in Chapter 23: Security
2151							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2152
2153							# P.656 Cleaning Up Your Environment
2154							# in Chapter 20: Security
2155							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2156
2157							# local $ENV{'PATH'} = '.';
2158	177					1884	local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer
2159
2160							# P.707 29.2.33. exec
2161							# in Chapter 29: Functions
2162							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2163							#
2164							# As we mentioned earlier, exec treats a discrete list of arguments as an
2165							# indication that it should bypass shell processing. However, there is one
2166							# place where you might still get tripped up. The exec call (and system, too)
2167							# will not distinguish between a single scalar argument and an array containing
2168							# only one element.
2169							#
2170							# @args = ("echo surprise"); # just one element in list
2171							# exec @args # still subject to shell escapes
2172							# or die "exec: $!"; # because @args == 1
2173							#
2174							# To avoid this, you can use the PATHNAME syntax, explicitly duplicating the
2175							# first argument as the pathname, which forces the rest of the arguments to be
2176							# interpreted as a list, even if there is only one of them:
2177							#
2178							# exec { $args[0] } @args # safe even with one-argument list
2179							# or die "can't exec @args: $!";
2180
2181							# P.855 exec
2182							# in Chapter 27: Functions
2183							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2184							#
2185							# As we mentioned earlier, exec treats a discrete list of arguments as a
2186							# directive to bypass shell processing. However, there is one place where
2187							# you might still get tripped up. The exec call (and system, too) cannot
2188							# distinguish between a single scalar argument and an array containing
2189							# only one element.
2190							#
2191							# @args = ("echo surprise"); # just one element in list
2192							# exec @args # still subject to shell escapes
2193							# \|\| die "exec: $!"; # because @args == 1
2194							#
2195							# To avoid this, use the PATHNAME syntax, explicitly duplicating the first
2196							# argument as the pathname, which forces the rest of the arguments to be
2197							# interpreted as a list, even if there is only one of them:
2198							#
2199							# exec { $args[0] } @args # safe even with one-argument list
2200							# \|\| die "can't exec @args: $!";
2201
2202	177					431	return CORE::system { $_[0] } @_; # safe even with one-argument list
	177					11230657
2203							}
2204
2205							#
2206							# KOI8-R order to character (with parameter)
2207							#
2208							sub Char::Ekoi8r::chr(;$) {
2209
2210	0	0		0	0		my $c = @_ ? $_[0] : $_;
2211
2212	0	0					if ($c == 0x00) {
2213	0						return "\x00";
2214							}
2215							else {
2216	0						my @chr = ();
2217	0						while ($c > 0) {
2218	0						unshift @chr, ($c % 0x100);
2219	0						$c = int($c / 0x100);
2220							}
2221	0						return pack 'C*', @chr;
2222							}
2223							}
2224
2225							#
2226							# KOI8-R order to character (without parameter)
2227							#
2228							sub Char::Ekoi8r::chr_() {
2229
2230	0			0	0		my $c = $_;
2231
2232	0	0					if ($c == 0x00) {
2233	0						return "\x00";
2234							}
2235							else {
2236	0						my @chr = ();
2237	0						while ($c > 0) {
2238	0						unshift @chr, ($c % 0x100);
2239	0						$c = int($c / 0x100);
2240							}
2241	0						return pack 'C*', @chr;
2242							}
2243							}
2244
2245							#
2246							# KOI8-R path globbing (with parameter)
2247							#
2248							sub Char::Ekoi8r::glob($) {
2249
2250	0	0		0	0		if (wantarray) {
2251	0						my @glob = _DOS_like_glob(@_);
2252	0						for my $glob (@glob) {
2253	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2254							}
2255	0						return @glob;
2256							}
2257							else {
2258	0						my $glob = _DOS_like_glob(@_);
2259	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2260	0						return $glob;
2261							}
2262							}
2263
2264							#
2265							# KOI8-R path globbing (without parameter)
2266							#
2267							sub Char::Ekoi8r::glob_() {
2268
2269	0	0		0	0		if (wantarray) {
2270	0						my @glob = _DOS_like_glob();
2271	0						for my $glob (@glob) {
2272	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2273							}
2274	0						return @glob;
2275							}
2276							else {
2277	0						my $glob = _DOS_like_glob();
2278	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2279	0						return $glob;
2280							}
2281							}
2282
2283							#
2284							# KOI8-R path globbing via File::DosGlob 1.10
2285							#
2286							# Often I confuse "_dosglob" and "_doglob".
2287							# So, I renamed "_dosglob" to "_DOS_like_glob".
2288							#
2289							my %iter;
2290							my %entries;
2291							sub _DOS_like_glob {
2292
2293							# context (keyed by second cxix argument provided by core)
2294	0			0			my($expr,$cxix) = @_;
2295
2296							# glob without args defaults to $_
2297	0	0					$expr = $_ if not defined $expr;
2298
2299							# represents the current user's home directory
2300							#
2301							# 7.3. Expanding Tildes in Filenames
2302							# in Chapter 7. File Access
2303							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2304							#
2305							# and File::HomeDir, File::HomeDir::Windows module
2306
2307							# DOS-like system
2308	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2309	0						$expr =~ s{ \A ~ (?= [^/\\] ) }
2310	0						{ my_home_MSWin32() }oxmse;
2311							}
2312
2313							# UNIX-like system
2314							else {
2315	0						$expr =~ s{ \A ~ ( (?:[^/])* ) }
2316	0	0	0				{ $1 ? (eval(q{(getpwnam($1))[7]})\|\|my_home()) : my_home() }oxmse;
2317							}
2318
2319							# assume global context if not provided one
2320	0	0					$cxix = '_G_' if not defined $cxix;
2321	0	0					$iter{$cxix} = 0 if not exists $iter{$cxix};
2322
2323							# if we're just beginning, do it all first
2324	0	0					if ($iter{$cxix} == 0) {
2325	0						$entries{$cxix} = [ _do_glob(1, _parse_line($expr)) ];
2326							}
2327
2328							# chuck it all out, quick or slow
2329	0	0					if (wantarray) {
2330	0						delete $iter{$cxix};
2331	0						return @{delete $entries{$cxix}};
	0
2332							}
2333							else {
2334	0	0					if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
	0
2335	0						return shift @{$entries{$cxix}};
	0
2336							}
2337							else {
2338							# return undef for EOL
2339	0						delete $iter{$cxix};
2340	0						delete $entries{$cxix};
2341	0						return undef;
2342							}
2343							}
2344							}
2345
2346							#
2347							# KOI8-R path globbing subroutine
2348							#
2349							sub _do_glob {
2350
2351	0			0			my($cond,@expr) = @_;
2352	0						my @glob = ();
2353	0						my $fix_drive_relative_paths = 0;
2354
2355							OUTER:
2356	0						for my $expr (@expr) {
2357	0	0					next OUTER if not defined $expr;
2358	0	0					next OUTER if $expr eq '';
2359
2360	0						my @matched = ();
2361	0						my @globdir = ();
2362	0						my $head = '.';
2363	0						my $pathsep = '/';
2364	0						my $tail;
2365
2366							# if argument is within quotes strip em and do no globbing
2367	0	0					if ($expr =~ /\A " ((?:$q_char)*) " \z/oxms) {
2368	0						$expr = $1;
2369	0	0					if ($cond eq 'd') {
2370	0	0					if (-d $expr) {
2371	0						push @glob, $expr;
2372							}
2373							}
2374							else {
2375	0	0					if (-e $expr) {
2376	0						push @glob, $expr;
2377							}
2378							}
2379	0						next OUTER;
2380							}
2381
2382							# wildcards with a drive prefix such as h:*.pm must be changed
2383							# to h:./*.pm to expand correctly
2384	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2385	0	0					if ($expr =~ s# \A ((?:[A-Za-z]:)?) ([^/\\]) #$1./$2#oxms) {
2386	0						$fix_drive_relative_paths = 1;
2387							}
2388							}
2389
2390	0	0					if (($head, $tail) = _parse_path($expr,$pathsep)) {
2391	0	0					if ($tail eq '') {
2392	0						push @glob, $expr;
2393	0						next OUTER;
2394							}
2395	0	0					if ($head =~ / \A (?:$q_char)? [?] /oxms) {
2396	0	0					if (@globdir = _do_glob('d', $head)) {
2397	0						push @glob, _do_glob($cond, map {"$_$pathsep$tail"} @globdir);
	0
2398	0						next OUTER;
2399							}
2400							}
2401	0	0	0				if ($head eq '' or $head =~ /\A [A-Za-z]: \z/oxms) {
2402	0						$head .= $pathsep;
2403							}
2404	0						$expr = $tail;
2405							}
2406
2407							# If file component has no wildcards, we can avoid opendir
2408	0	0					if ($expr !~ / \A (?:$q_char)? [?] /oxms) {
2409	0	0					if ($head eq '.') {
2410	0						$head = '';
2411							}
2412	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2413	0						$head .= $pathsep;
2414							}
2415	0						$head .= $expr;
2416	0	0					if ($cond eq 'd') {
2417	0	0					if (-d $head) {
2418	0						push @glob, $head;
2419							}
2420							}
2421							else {
2422	0	0					if (-e $head) {
2423	0						push @glob, $head;
2424							}
2425							}
2426	0						next OUTER;
2427							}
2428	0	0					opendir(*DIR, $head) or next OUTER;
2429	0						my @leaf = readdir DIR;
2430	0						closedir DIR;
2431
2432	0	0					if ($head eq '.') {
2433	0						$head = '';
2434							}
2435	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2436	0						$head .= $pathsep;
2437							}
2438
2439	0						my $pattern = '';
2440	0						while ($expr =~ / \G ($q_char) /oxgc) {
2441	0						my $char = $1;
2442
2443							# 6.9. Matching Shell Globs as Regular Expressions
2444							# in Chapter 6. Pattern Matching
2445							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2446							# (and so on)
2447
2448	0	0					if ($char eq '*') {
		0
		0
2449	0						$pattern .= "(?:$your_char)*",
2450							}
2451							elsif ($char eq '?') {
2452	0						$pattern .= "(?:$your_char)?", # DOS style
2453							# $pattern .= "(?:$your_char)", # UNIX style
2454							}
2455							elsif ((my $fc = Char::Ekoi8r::fc($char)) ne $char) {
2456	0						$pattern .= $fc;
2457							}
2458							else {
2459	0						$pattern .= quotemeta $char;
2460							}
2461							}
2462	0			0			my $matchsub = sub { Char::Ekoi8r::fc($_[0]) =~ /\A $pattern \z/xms };
	0
2463
2464							# if ($@) {
2465							# print STDERR "$0: $@\n";
2466							# next OUTER;
2467							# }
2468
2469							INNER:
2470	0						for my $leaf (@leaf) {
2471	0	0	0				if ($leaf eq '.' or $leaf eq '..') {
2472	0						next INNER;
2473							}
2474	0	0	0				if ($cond eq 'd' and not -d "$head$leaf") {
2475	0						next INNER;
2476							}
2477
2478	0	0					if (&$matchsub($leaf)) {
2479	0						push @matched, "$head$leaf";
2480	0						next INNER;
2481							}
2482
2483							# [DOS compatibility special case]
2484							# Failed, add a trailing dot and try again, but only...
2485
2486	0	0	0				if (Char::Ekoi8r::index($leaf,'.') == -1 and # if name does not have a dot in it and
			0
2487							CORE::length($leaf) <= 8 and # name is shorter than or equal to 8 chars and
2488							Char::Ekoi8r::index($pattern,'\\.') != -1 # pattern has a dot.
2489							) {
2490	0	0					if (&$matchsub("$leaf.")) {
2491	0						push @matched, "$head$leaf";
2492	0						next INNER;
2493							}
2494							}
2495							}
2496	0	0					if (@matched) {
2497	0						push @glob, @matched;
2498							}
2499							}
2500	0	0					if ($fix_drive_relative_paths) {
2501	0						for my $glob (@glob) {
2502	0						$glob =~ s# \A ([A-Za-z]:) \./ #$1#oxms;
2503							}
2504							}
2505	0						return @glob;
2506							}
2507
2508							#
2509							# KOI8-R parse line
2510							#
2511							sub _parse_line {
2512
2513	0			0			my($line) = @_;
2514
2515	0						$line .= ' ';
2516	0						my @piece = ();
2517	0						while ($line =~ /
2518							" ( (?: [^"] )* ) " \s+ \|
2519							( (?: [^"\s] )* ) \s+
2520							/oxmsg
2521							) {
2522	0	0					push @piece, defined($1) ? $1 : $2;
2523							}
2524	0						return @piece;
2525							}
2526
2527							#
2528							# KOI8-R parse path
2529							#
2530							sub _parse_path {
2531
2532	0			0			my($path,$pathsep) = @_;
2533
2534	0						$path .= '/';
2535	0						my @subpath = ();
2536	0						while ($path =~ /
2537							((?: [^\/\\] )+?) [\/\\]
2538							/oxmsg
2539							) {
2540	0						push @subpath, $1;
2541							}
2542
2543	0						my $tail = pop @subpath;
2544	0						my $head = join $pathsep, @subpath;
2545	0						return $head, $tail;
2546							}
2547
2548							#
2549							# via File::HomeDir::Windows 1.00
2550							#
2551							sub my_home_MSWin32 {
2552
2553							# A lot of unix people and unix-derived tools rely on
2554							# the ability to overload HOME. We will support it too
2555							# so that they can replace raw HOME calls with File::HomeDir.
2556	0	0	0	0	0		if (exists $ENV{'HOME'} and $ENV{'HOME'}) {
		0	0
		0	0
			0
			0
2557	0						return $ENV{'HOME'};
2558							}
2559
2560							# Do we have a user profile?
2561							elsif (exists $ENV{'USERPROFILE'} and $ENV{'USERPROFILE'}) {
2562	0						return $ENV{'USERPROFILE'};
2563							}
2564
2565							# Some Windows use something like $ENV{'HOME'}
2566							elsif (exists $ENV{'HOMEDRIVE'} and exists $ENV{'HOMEPATH'} and $ENV{'HOMEDRIVE'} and $ENV{'HOMEPATH'}) {
2567	0						return join '', $ENV{'HOMEDRIVE'}, $ENV{'HOMEPATH'};
2568							}
2569
2570	0						return undef;
2571							}
2572
2573							#
2574							# via File::HomeDir::Unix 1.00
2575							#
2576							sub my_home {
2577	0			0	0		my $home;
2578
2579	0	0	0				if (exists $ENV{'HOME'} and defined $ENV{'HOME'}) {
		0	0
2580	0						$home = $ENV{'HOME'};
2581							}
2582
2583							# This is from the original code, but I'm guessing
2584							# it means "login directory" and exists on some Unixes.
2585							elsif (exists $ENV{'LOGDIR'} and $ENV{'LOGDIR'}) {
2586	0						$home = $ENV{'LOGDIR'};
2587							}
2588
2589							### More-desperate methods
2590
2591							# Light desperation on any (Unixish) platform
2592							else {
2593	0						$home = eval q{ (getpwuid($<))[7] };
2594							}
2595
2596							# On Unix in general, a non-existant home means "no home"
2597							# For example, "nobody"-like users might use /nonexistant
2598	0	0	0				if (defined $home and ! -d($home)) {
2599	0						$home = undef;
2600							}
2601	0						return $home;
2602							}
2603
2604							#
2605							# ${^PREMATCH}, $PREMATCH, $` the string preceding what was matched
2606							#
2607							sub Char::Ekoi8r::PREMATCH {
2608	0			0	0		return $`;
2609							}
2610
2611							#
2612							# ${^MATCH}, $MATCH, $& the string that matched
2613							#
2614							sub Char::Ekoi8r::MATCH {
2615	0			0	0		return $&;
2616							}
2617
2618							#
2619							# ${^POSTMATCH}, $POSTMATCH, $' the string following what was matched
2620							#
2621							sub Char::Ekoi8r::POSTMATCH {
2622	0			0	0		return $';
2623							}
2624
2625							#
2626							# KOI8-R character to order (with parameter)
2627							#
2628							sub Char::KOI8R::ord(;$) {
2629
2630	0	0		0	1		local $_ = shift if @_;
2631
2632	0	0					if (/\A ($q_char) /oxms) {
2633	0						my @ord = unpack 'C*', $1;
2634	0						my $ord = 0;
2635	0						while (my $o = shift @ord) {
2636	0						$ord = $ord * 0x100 + $o;
2637							}
2638	0						return $ord;
2639							}
2640							else {
2641	0						return CORE::ord $_;
2642							}
2643							}
2644
2645							#
2646							# KOI8-R character to order (without parameter)
2647							#
2648							sub Char::KOI8R::ord_() {
2649
2650	0	0		0	0		if (/\A ($q_char) /oxms) {
2651	0						my @ord = unpack 'C*', $1;
2652	0						my $ord = 0;
2653	0						while (my $o = shift @ord) {
2654	0						$ord = $ord * 0x100 + $o;
2655							}
2656	0						return $ord;
2657							}
2658							else {
2659	0						return CORE::ord $_;
2660							}
2661							}
2662
2663							#
2664							# KOI8-R reverse
2665							#
2666							sub Char::KOI8R::reverse(@) {
2667
2668	0	0		0	0		if (wantarray) {
2669	0						return CORE::reverse @_;
2670							}
2671							else {
2672
2673							# One of us once cornered Larry in an elevator and asked him what
2674							# problem he was solving with this, but he looked as far off into
2675							# the distance as he could in an elevator and said, "It seemed like
2676							# a good idea at the time."
2677
2678	0						return join '', CORE::reverse(join('',@_) =~ /\G ($q_char) /oxmsg);
2679							}
2680							}
2681
2682							#
2683							# KOI8-R getc (with parameter, without parameter)
2684							#
2685							sub Char::KOI8R::getc(;*@) {
2686
2687	0			0	0		my($package) = caller;
2688	0	0					my $fh = @_ ? qualify_to_ref(shift,$package) : \*STDIN;
2689	0	0	0				croak 'Too many arguments for Char::KOI8R::getc' if @_ and not wantarray;
2690
2691	0						my @length = sort { $a <=> $b } keys %range_tr;
	0
2692	0						my $getc = '';
2693	0						for my $length ($length[0] .. $length[-1]) {
2694	0						$getc .= CORE::getc($fh);
2695	0	0					if (exists $range_tr{CORE::length($getc)}) {
2696	0	0					if ($getc =~ /\A ${Char::Ekoi8r::dot_s} \z/oxms) {
2697	0	0					return wantarray ? ($getc,@_) : $getc;
2698							}
2699							}
2700							}
2701	0	0					return wantarray ? ($getc,@_) : $getc;
2702							}
2703
2704							#
2705							# KOI8-R length by character
2706							#
2707							sub Char::KOI8R::length(;$) {
2708
2709	0	0		0	1		local $_ = shift if @_;
2710
2711	0						local @_ = /\G ($q_char) /oxmsg;
2712	0						return scalar @_;
2713							}
2714
2715							#
2716							# KOI8-R substr by character
2717							#
2718							BEGIN {
2719
2720							# P.232 The lvalue Attribute
2721							# in Chapter 6: Subroutines
2722							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2723
2724							# P.336 The lvalue Attribute
2725							# in Chapter 7: Subroutines
2726							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2727
2728							# P.144 8.4 Lvalue subroutines
2729							# in Chapter 8: perlsub: Perl subroutines
2730							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
2731
2732	177	50	0	177	1	287954	eval sprintf(<<'END', ($] >= 5.014000) ? ':lvalue' : '');
	0	0		0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
2733							# vv----------------*******
2734							sub Char::KOI8R::substr($$;$$) %s {
2735
2736							my @char = $_[0] =~ /\G ($q_char) /oxmsg;
2737
2738							# If the substring is beyond either end of the string, substr() returns the undefined
2739							# value and produces a warning. When used as an lvalue, specifying a substring that
2740							# is entirely outside the string raises an exception.
2741							# http://perldoc.perl.org/functions/substr.html
2742
2743							# A return with no argument returns the scalar value undef in scalar context,
2744							# an empty list () in list context, and (naturally) nothing at all in void
2745							# context.
2746
2747							my $offset = $_[1];
2748							if (($offset > scalar(@char)) or ($offset < (-1 * scalar(@char)))) {
2749							return;
2750							}
2751
2752							# substr($string,$offset,$length,$replacement)
2753							if (@_ == 4) {
2754							my(undef,undef,$length,$replacement) = @_;
2755							my $substr = join '', splice(@char, $offset, $length, $replacement);
2756							$_[0] = join '', @char;
2757
2758							# return $substr; this doesn't work, don't say "return"
2759							$substr;
2760							}
2761
2762							# substr($string,$offset,$length)
2763							elsif (@_ == 3) {
2764							my(undef,undef,$length) = @_;
2765							my $octet_offset = 0;
2766							my $octet_length = 0;
2767							if ($offset == 0) {
2768							$octet_offset = 0;
2769							}
2770							elsif ($offset > 0) {
2771							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2772							}
2773							else {
2774							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2775							}
2776							if ($length == 0) {
2777							$octet_length = 0;
2778							}
2779							elsif ($length > 0) {
2780							$octet_length = CORE::length(join '', @char[$offset..$offset+$length-1]);
2781							}
2782							else {
2783							$octet_length = -1 * CORE::length(join '', @char[$#char+$length+1..$#char]);
2784							}
2785							CORE::substr($_[0], $octet_offset, $octet_length);
2786							}
2787
2788							# substr($string,$offset)
2789							else {
2790							my $octet_offset = 0;
2791							if ($offset == 0) {
2792							$octet_offset = 0;
2793							}
2794							elsif ($offset > 0) {
2795							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2796							}
2797							else {
2798							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2799							}
2800							CORE::substr($_[0], $octet_offset);
2801							}
2802							}
2803							END
2804							}
2805
2806							#
2807							# KOI8-R index by character
2808							#
2809							sub Char::KOI8R::index($$;$) {
2810
2811	0			0	1		my $index;
2812	0	0					if (@_ == 3) {
2813	0						$index = Char::Ekoi8r::index($_[0], $_[1], CORE::length(Char::KOI8R::substr($_[0], 0, $_[2])));
2814							}
2815							else {
2816	0						$index = Char::Ekoi8r::index($_[0], $_[1]);
2817							}
2818
2819	0	0					if ($index == -1) {
2820	0						return -1;
2821							}
2822							else {
2823	0						return Char::KOI8R::length(CORE::substr $_[0], 0, $index);
2824							}
2825							}
2826
2827							#
2828							# KOI8-R rindex by character
2829							#
2830							sub Char::KOI8R::rindex($$;$) {
2831
2832	0			0	1		my $rindex;
2833	0	0					if (@_ == 3) {
2834	0						$rindex = Char::Ekoi8r::rindex($_[0], $_[1], CORE::length(Char::KOI8R::substr($_[0], 0, $_[2])));
2835							}
2836							else {
2837	0						$rindex = Char::Ekoi8r::rindex($_[0], $_[1]);
2838							}
2839
2840	0	0					if ($rindex == -1) {
2841	0						return -1;
2842							}
2843							else {
2844	0						return Char::KOI8R::length(CORE::substr $_[0], 0, $rindex);
2845							}
2846							}
2847
2848							#
2849							# instead of Carp::carp
2850							#
2851							sub carp {
2852	0			0	0		my($package,$filename,$line) = caller(1);
2853	0						print STDERR "@_ at $filename line $line.\n";
2854							}
2855
2856							#
2857							# instead of Carp::croak
2858							#
2859							sub croak {
2860	0			0	0		my($package,$filename,$line) = caller(1);
2861	0						print STDERR "@_ at $filename line $line.\n";
2862	0						die "\n";
2863							}
2864
2865							#
2866							# instead of Carp::cluck
2867							#
2868							sub cluck {
2869	0			0	0		my $i = 0;
2870	0						my @cluck = ();
2871	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2872	0						push @cluck, "[$i] $filename($line) $package::$subroutine\n";
2873	0						$i++;
2874							}
2875	0						print STDERR CORE::reverse @cluck;
2876	0						print STDERR "\n";
2877	0						carp @_;
2878							}
2879
2880							#
2881							# instead of Carp::confess
2882							#
2883							sub confess {
2884	0			0	0		my $i = 0;
2885	0						my @confess = ();
2886	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2887	0						push @confess, "[$i] $filename($line) $package::$subroutine\n";
2888	0						$i++;
2889							}
2890	0						print STDERR CORE::reverse @confess;
2891	0						print STDERR "\n";
2892	0						croak @_;
2893							}
2894
2895							1;
2896
2897							__END__