File Coverage

Char/Ewindows1258.pm

Criterion	Covered	Total	%
statement	51	954	5.3
branch	4	562	0.7
condition	1	180	0.5
subroutine	20	85	23.5
pod	7	50	14.0
total	83	1831	4.5

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# This file is * FOR CPAN USE ONLY *.
3							#
4							package Char::Ewindows1258;
5							######################################################################
6							#
7							# Char::Ewindows1258 - Run-time routines for Char/Windows1258.pm
8							#
9							# http://search.cpan.org/dist/Char-Windows1258/
10							#
11							# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 INABA Hitoshi
12							######################################################################
13
14	177			177		4739	use 5.00503; # Galapagos Consensus 1998 for primetools
	177					751
	177					9058
15							# use 5.008001; # Lancaster Consensus 2013 for toolchains
16
17							# 12.3. Delaying use Until Runtime
18							# in Chapter 12. Packages, Libraries, and Modules
19							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
20							# (and so on)
21
22	177			177		14638	BEGIN { eval q{ use vars qw($VERSION) } }
	177			177		1209
	177					428
	177					40304
23							$VERSION = sprintf '%d.%02d', q$Revision: 1.00 $ =~ /(\d+)/xmsg;
24
25							BEGIN {
26	177	50		177		1263	if ($^X =~ / jperl /oxmsi) {
27	0					0	die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)";
28							}
29	177					307	if (CORE::ord('A') == 193) {
30							die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).";
31							}
32	177					33191	if (CORE::ord('A') != 0x41) {
33							die __FILE__, ": is not US-ASCII script (must be US-ASCII script).";
34							}
35							}
36
37							BEGIN {
38
39							# instead of utf8.pm
40	177			177		15747	eval q{
	177			177		1269
	177			45		331
	177					37190
	45					7654
	64					11135
	68					13435
	56					10038
	62					13368
	59					10781
41							no warnings qw(redefine);
42							*utf8::upgrade = sub { CORE::length $_[0] };
43							*utf8::downgrade = sub { 1 };
44							*utf8::encode = sub { };
45							*utf8::decode = sub { 1 };
46							*utf8::is_utf8 = sub { };
47							*utf8::valid = sub { 1 };
48							};
49	177	50				129888	if ($@) {
50	0					0	*utf8::upgrade = sub { CORE::length $_[0] };
	0					0
51	0					0	*utf8::downgrade = sub { 1 };
	0					0
52	0					0	*utf8::encode = sub { };
	0					0
53	0					0	*utf8::decode = sub { 1 };
	0					0
54	0					0	*utf8::is_utf8 = sub { };
	0					0
55	0					0	*utf8::valid = sub { 1 };
	0					0
56							}
57							}
58
59							# instead of Symbol.pm
60							BEGIN {
61	177			177		437	my $genpkg = "Symbol::";
62	177					8977	my $genseq = 0;
63
64							sub gensym () {
65	0			0	0	0	my $name = "GEN" . $genseq++;
66
67							# here, no strict qw(refs); if strict.pm exists
68
69	0					0	my $ref = \*{$genpkg . $name};
	0					0
70	0					0	delete $$genpkg{$name};
71	0					0	return $ref;
72							}
73
74							sub qualify ($;$) {
75	0			0	0	0	my ($name) = @_;
76	0	0	0			0	if (!ref($name) && (Char::Ewindows1258::index($name, '::') == -1) && (Char::Ewindows1258::index($name, "'") == -1)) {
			0
77	0					0	my $pkg;
78	0					0	my %global = map {$_ => 1} qw(ARGV ARGVOUT ENV INC SIG STDERR STDIN STDOUT DATA);
	0					0
79
80							# Global names: special character, "^xyz", or other.
81	0	0	0			0	if ($name =~ /^(([^a-z])\|(\^[a-z_]+))\z/i \|\| $global{$name}) {
82							# RGS 2001-11-05 : translate leading ^X to control-char
83	0					0	$name =~ s/^\^([a-z_])/'qq(\c'.$1.')'/eei;
	0					0
84	0					0	$pkg = "main";
85							}
86							else {
87	0	0				0	$pkg = (@_ > 1) ? $_[1] : caller;
88							}
89	0					0	$name = $pkg . "::" . $name;
90							}
91	0					0	return $name;
92							}
93
94							sub qualify_to_ref ($;$) {
95
96							# here, no strict qw(refs); if strict.pm exists
97
98	0	0		0	0	0	return \*{ qualify $_[0], @_ > 1 ? $_[1] : caller };
	0					0
99							}
100							}
101
102							# Column: local $@
103							# in Chapter 9. Osaete okitai Perl no kiso
104							# of ISBN 10: 4798119172 \| ISBN 13: 978-4798119175 MODAN Perl NYUMON
105							# (and so on)
106
107							# use strict; if strict.pm exists
108							BEGIN {
109	177	50		177		781	if (eval { local $@; CORE::require strict }) {
	177					419
	177					2896
110	177					27652	strict::->import;
111							}
112							}
113
114							# P.714 29.2.39. flock
115							# in Chapter 29: Functions
116							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
117
118							# P.863 flock
119							# in Chapter 27: Functions
120							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
121
122							sub LOCK_SH() {1}
123							sub LOCK_EX() {2}
124							sub LOCK_UN() {8}
125							sub LOCK_NB() {4}
126
127							# instead of Carp.pm
128							sub carp;
129							sub croak;
130							sub cluck;
131							sub confess;
132
133							my $your_char = q{[\x00-\xFF]};
134
135							# regexp of character
136	177			177		23619	BEGIN { eval q{ use vars qw($q_char) } }
	177			177		1304
	177					324
	177					16972
137							$q_char = qr/$your_char/oxms;
138
139							#
140							# Windows-1258 character range per length
141							#
142							my %range_tr = ();
143
144							#
145							# alias of encoding name
146							#
147	177			177		10705	BEGIN { eval q{ use vars qw($encoding_alias) } }
	177			177		937
	177					388
	177					489926
148
149							#
150							# Windows-1258 case conversion
151							#
152							my %lc = ();
153							@lc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
154							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
155							my %uc = ();
156							@uc{qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)} =
157							qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
158							my %fc = ();
159							@fc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
160							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
161
162							if (0) {
163							}
164
165							elsif (__PACKAGE__ =~ / \b Ewindows1258 \z/oxms) {
166							%range_tr = (
167							1 => [ [0x00..0xFF],
168							],
169							);
170							$encoding_alias = qr/ \b (?: windows-?1258 ) \b /oxmsi;
171
172							%lc = (%lc,
173							"\x8C" => "\x9C", # LATIN LIGATURE OE
174							"\x9F" => "\xFF", # LATIN LETTER Y WITH DIAERESIS
175							"\xC0" => "\xE0", # LATIN LETTER A WITH GRAVE
176							"\xC1" => "\xE1", # LATIN LETTER A WITH ACUTE
177							"\xC2" => "\xE2", # LATIN LETTER A WITH CIRCUMFLEX
178							"\xC3" => "\xE3", # LATIN LETTER A WITH BREVE
179							"\xC4" => "\xE4", # LATIN LETTER A WITH DIAERESIS
180							"\xC5" => "\xE5", # LATIN LETTER A WITH RING ABOVE
181							"\xC6" => "\xE6", # LATIN LETTER AE
182							"\xC7" => "\xE7", # LATIN LETTER C WITH CEDILLA
183							"\xC8" => "\xE8", # LATIN LETTER E WITH GRAVE
184							"\xC9" => "\xE9", # LATIN LETTER E WITH ACUTE
185							"\xCA" => "\xEA", # LATIN LETTER E WITH CIRCUMFLEX
186							"\xCB" => "\xEB", # LATIN LETTER E WITH DIAERESIS
187							"\xCD" => "\xED", # LATIN LETTER I WITH ACUTE
188							"\xCE" => "\xEE", # LATIN LETTER I WITH CIRCUMFLEX
189							"\xCF" => "\xEF", # LATIN LETTER I WITH DIAERESIS
190							"\xD0" => "\xF0", # LATIN LETTER D WITH STROKE
191							"\xD1" => "\xF1", # LATIN LETTER N WITH TILDE
192							"\xD3" => "\xF3", # LATIN LETTER O WITH ACUTE
193							"\xD4" => "\xF4", # LATIN LETTER O WITH CIRCUMFLEX
194							"\xD5" => "\xF5", # LATIN LETTER O WITH HORN
195							"\xD6" => "\xF6", # LATIN LETTER O WITH DIAERESIS
196							"\xD8" => "\xF8", # LATIN LETTER O WITH STROKE
197							"\xD9" => "\xF9", # LATIN LETTER U WITH GRAVE
198							"\xDA" => "\xFA", # LATIN LETTER U WITH ACUTE
199							"\xDB" => "\xFB", # LATIN LETTER U WITH CIRCUMFLEX
200							"\xDC" => "\xFC", # LATIN LETTER U WITH DIAERESIS
201							"\xDD" => "\xFD", # LATIN LETTER U WITH HORN
202							);
203
204							%uc = (%uc,
205							"\x9C" => "\x8C", # LATIN LIGATURE OE
206							"\xFF" => "\x9F", # LATIN LETTER Y WITH DIAERESIS
207							"\xE0" => "\xC0", # LATIN LETTER A WITH GRAVE
208							"\xE1" => "\xC1", # LATIN LETTER A WITH ACUTE
209							"\xE2" => "\xC2", # LATIN LETTER A WITH CIRCUMFLEX
210							"\xE3" => "\xC3", # LATIN LETTER A WITH BREVE
211							"\xE4" => "\xC4", # LATIN LETTER A WITH DIAERESIS
212							"\xE5" => "\xC5", # LATIN LETTER A WITH RING ABOVE
213							"\xE6" => "\xC6", # LATIN LETTER AE
214							"\xE7" => "\xC7", # LATIN LETTER C WITH CEDILLA
215							"\xE8" => "\xC8", # LATIN LETTER E WITH GRAVE
216							"\xE9" => "\xC9", # LATIN LETTER E WITH ACUTE
217							"\xEA" => "\xCA", # LATIN LETTER E WITH CIRCUMFLEX
218							"\xEB" => "\xCB", # LATIN LETTER E WITH DIAERESIS
219							"\xED" => "\xCD", # LATIN LETTER I WITH ACUTE
220							"\xEE" => "\xCE", # LATIN LETTER I WITH CIRCUMFLEX
221							"\xEF" => "\xCF", # LATIN LETTER I WITH DIAERESIS
222							"\xF0" => "\xD0", # LATIN LETTER D WITH STROKE
223							"\xF1" => "\xD1", # LATIN LETTER N WITH TILDE
224							"\xF3" => "\xD3", # LATIN LETTER O WITH ACUTE
225							"\xF4" => "\xD4", # LATIN LETTER O WITH CIRCUMFLEX
226							"\xF5" => "\xD5", # LATIN LETTER O WITH HORN
227							"\xF6" => "\xD6", # LATIN LETTER O WITH DIAERESIS
228							"\xF8" => "\xD8", # LATIN LETTER O WITH STROKE
229							"\xF9" => "\xD9", # LATIN LETTER U WITH GRAVE
230							"\xFA" => "\xDA", # LATIN LETTER U WITH ACUTE
231							"\xFB" => "\xDB", # LATIN LETTER U WITH CIRCUMFLEX
232							"\xFC" => "\xDC", # LATIN LETTER U WITH DIAERESIS
233							"\xFD" => "\xDD", # LATIN LETTER U WITH HORN
234							);
235
236							%fc = (%fc,
237							"\x8C" => "\x9C", # LATIN CAPITAL LIGATURE OE --> LATIN SMALL LIGATURE OE
238							"\x9F" => "\xFF", # LATIN CAPITAL LETTER Y WITH DIAERESIS --> LATIN SMALL LETTER Y WITH DIAERESIS
239							"\xC0" => "\xE0", # LATIN CAPITAL LETTER A WITH GRAVE --> LATIN SMALL LETTER A WITH GRAVE
240							"\xC1" => "\xE1", # LATIN CAPITAL LETTER A WITH ACUTE --> LATIN SMALL LETTER A WITH ACUTE
241							"\xC2" => "\xE2", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX --> LATIN SMALL LETTER A WITH CIRCUMFLEX
242							"\xC3" => "\xE3", # LATIN CAPITAL LETTER A WITH BREVE --> LATIN SMALL LETTER A WITH BREVE
243							"\xC4" => "\xE4", # LATIN CAPITAL LETTER A WITH DIAERESIS --> LATIN SMALL LETTER A WITH DIAERESIS
244							"\xC5" => "\xE5", # LATIN CAPITAL LETTER A WITH RING ABOVE --> LATIN SMALL LETTER A WITH RING ABOVE
245							"\xC6" => "\xE6", # LATIN CAPITAL LETTER AE --> LATIN SMALL LETTER AE
246							"\xC7" => "\xE7", # LATIN CAPITAL LETTER C WITH CEDILLA --> LATIN SMALL LETTER C WITH CEDILLA
247							"\xC8" => "\xE8", # LATIN CAPITAL LETTER E WITH GRAVE --> LATIN SMALL LETTER E WITH GRAVE
248							"\xC9" => "\xE9", # LATIN CAPITAL LETTER E WITH ACUTE --> LATIN SMALL LETTER E WITH ACUTE
249							"\xCA" => "\xEA", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX --> LATIN SMALL LETTER E WITH CIRCUMFLEX
250							"\xCB" => "\xEB", # LATIN CAPITAL LETTER E WITH DIAERESIS --> LATIN SMALL LETTER E WITH DIAERESIS
251							"\xCD" => "\xED", # LATIN CAPITAL LETTER I WITH ACUTE --> LATIN SMALL LETTER I WITH ACUTE
252							"\xCE" => "\xEE", # LATIN CAPITAL LETTER I WITH CIRCUMFLEX --> LATIN SMALL LETTER I WITH CIRCUMFLEX
253							"\xCF" => "\xEF", # LATIN CAPITAL LETTER I WITH DIAERESIS --> LATIN SMALL LETTER I WITH DIAERESIS
254							"\xD0" => "\xF0", # LATIN CAPITAL LETTER D WITH STROKE --> LATIN SMALL LETTER D WITH STROKE
255							"\xD1" => "\xF1", # LATIN CAPITAL LETTER N WITH TILDE --> LATIN SMALL LETTER N WITH TILDE
256							"\xD3" => "\xF3", # LATIN CAPITAL LETTER O WITH ACUTE --> LATIN SMALL LETTER O WITH ACUTE
257							"\xD4" => "\xF4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX --> LATIN SMALL LETTER O WITH CIRCUMFLEX
258							"\xD5" => "\xF5", # LATIN CAPITAL LETTER O WITH HORN --> LATIN SMALL LETTER O WITH HORN
259							"\xD6" => "\xF6", # LATIN CAPITAL LETTER O WITH DIAERESIS --> LATIN SMALL LETTER O WITH DIAERESIS
260							"\xD8" => "\xF8", # LATIN CAPITAL LETTER O WITH STROKE --> LATIN SMALL LETTER O WITH STROKE
261							"\xD9" => "\xF9", # LATIN CAPITAL LETTER U WITH GRAVE --> LATIN SMALL LETTER U WITH GRAVE
262							"\xDA" => "\xFA", # LATIN CAPITAL LETTER U WITH ACUTE --> LATIN SMALL LETTER U WITH ACUTE
263							"\xDB" => "\xFB", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX --> LATIN SMALL LETTER U WITH CIRCUMFLEX
264							"\xDC" => "\xFC", # LATIN CAPITAL LETTER U WITH DIAERESIS --> LATIN SMALL LETTER U WITH DIAERESIS
265							"\xDD" => "\xFD", # LATIN CAPITAL LETTER U WITH HORN --> LATIN SMALL LETTER U WITH HORN
266							"\xDF" => "\x73\x73", # LATIN SMALL LETTER SHARP S --> LATIN SMALL LETTER S, LATIN SMALL LETTER S
267							);
268							}
269
270							else {
271							croak "Don't know my package name '@{[__PACKAGE__]}'";
272							}
273
274							#
275							# @ARGV wildcard globbing
276							#
277							sub import {
278
279	0	0		0		0	if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
280	0					0	my @argv = ();
281	0					0	for (@ARGV) {
282
283							# has space
284	0	0				0	if (/\A (?:$q_char)*? [ ] /oxms) {
		0
285	0	0				0	if (my @glob = Char::Ewindows1258::glob(qq{"$_"})) {
286	0					0	push @argv, @glob;
287							}
288							else {
289	0					0	push @argv, $_;
290							}
291							}
292
293							# has wildcard metachar
294							elsif (/\A (?:$q_char)? [?] /oxms) {
295	0	0				0	if (my @glob = Char::Ewindows1258::glob($_)) {
296	0					0	push @argv, @glob;
297							}
298							else {
299	0					0	push @argv, $_;
300							}
301							}
302
303							# no wildcard globbing
304							else {
305	0					0	push @argv, $_;
306							}
307							}
308	0					0	@ARGV = @argv;
309							}
310							}
311
312							# P.230 Care with Prototypes
313							# in Chapter 6: Subroutines
314							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
315							#
316							# If you aren't careful, you can get yourself into trouble with prototypes.
317							# But if you are careful, you can do a lot of neat things with them. This is
318							# all very powerful, of course, and should only be used in moderation to make
319							# the world a better place.
320
321							# P.332 Care with Prototypes
322							# in Chapter 7: Subroutines
323							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
324							#
325							# If you aren't careful, you can get yourself into trouble with prototypes.
326							# But if you are careful, you can do a lot of neat things with them. This is
327							# all very powerful, of course, and should only be used in moderation to make
328							# the world a better place.
329
330							#
331							# Prototypes of subroutines
332							#
333	0			0		0	sub unimport {}
334							sub Char::Ewindows1258::split(;$$$);
335							sub Char::Ewindows1258::tr($$$$;$);
336							sub Char::Ewindows1258::chop(@);
337							sub Char::Ewindows1258::index($$;$);
338							sub Char::Ewindows1258::rindex($$;$);
339							sub Char::Ewindows1258::lcfirst(@);
340							sub Char::Ewindows1258::lcfirst_();
341							sub Char::Ewindows1258::lc(@);
342							sub Char::Ewindows1258::lc_();
343							sub Char::Ewindows1258::ucfirst(@);
344							sub Char::Ewindows1258::ucfirst_();
345							sub Char::Ewindows1258::uc(@);
346							sub Char::Ewindows1258::uc_();
347							sub Char::Ewindows1258::fc(@);
348							sub Char::Ewindows1258::fc_();
349							sub Char::Ewindows1258::ignorecase;
350							sub Char::Ewindows1258::classic_character_class;
351							sub Char::Ewindows1258::capture;
352							sub Char::Ewindows1258::chr(;$);
353							sub Char::Ewindows1258::chr_();
354							sub Char::Ewindows1258::glob($);
355							sub Char::Ewindows1258::glob_();
356
357							sub Char::Windows1258::ord(;$);
358							sub Char::Windows1258::ord_();
359							sub Char::Windows1258::reverse(@);
360							sub Char::Windows1258::getc(;*@);
361							sub Char::Windows1258::length(;$);
362							sub Char::Windows1258::substr($$;$$);
363							sub Char::Windows1258::index($$;$);
364							sub Char::Windows1258::rindex($$;$);
365
366							#
367							# Regexp work
368							#
369	177			177		29264	BEGIN { eval q{ use vars qw(
	177			177		1388
	177					364
	177					104553
370							$Char::Windows1258::re_a
371							$Char::Windows1258::re_t
372							$Char::Windows1258::re_n
373							$Char::Windows1258::re_r
374							) } }
375
376							#
377							# Character class
378							#
379	177			177		19054	BEGIN { eval q{ use vars qw(
	177			177		2079
	177					670
	177					3452598
380							$dot
381							$dot_s
382							$eD
383							$eS
384							$eW
385							$eH
386							$eV
387							$eR
388							$eN
389							$not_alnum
390							$not_alpha
391							$not_ascii
392							$not_blank
393							$not_cntrl
394							$not_digit
395							$not_graph
396							$not_lower
397							$not_lower_i
398							$not_print
399							$not_punct
400							$not_space
401							$not_upper
402							$not_upper_i
403							$not_word
404							$not_xdigit
405							$eb
406							$eB
407							) } }
408
409							${Char::Ewindows1258::dot} = qr{(?:[^\x0A])};
410							${Char::Ewindows1258::dot_s} = qr{(?:[\x00-\xFF])};
411							${Char::Ewindows1258::eD} = qr{(?:[^0-9])};
412
413							# Vertical tabs are now whitespace
414							# \s in a regex now matches a vertical tab in all circumstances.
415							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
416							# ${Char::Ewindows1258::eS} = qr{(?:[^\x09\x0A \x0C\x0D\x20])};
417							# ${Char::Ewindows1258::eS} = qr{(?:[^\x09\x0A\x0B\x0C\x0D\x20])};
418							${Char::Ewindows1258::eS} = qr{(?:[^\s])};
419
420							${Char::Ewindows1258::eW} = qr{(?:[^0-9A-Z_a-z])};
421							${Char::Ewindows1258::eH} = qr{(?:[^\x09\x20])};
422							${Char::Ewindows1258::eV} = qr{(?:[^\x0A\x0B\x0C\x0D])};
423							${Char::Ewindows1258::eR} = qr{(?:\x0D\x0A\|[\x0A\x0D])};
424							${Char::Ewindows1258::eN} = qr{(?:[^\x0A])};
425							${Char::Ewindows1258::not_alnum} = qr{(?:[^\x30-\x39\x41-\x5A\x61-\x7A])};
426							${Char::Ewindows1258::not_alpha} = qr{(?:[^\x41-\x5A\x61-\x7A])};
427							${Char::Ewindows1258::not_ascii} = qr{(?:[^\x00-\x7F])};
428							${Char::Ewindows1258::not_blank} = qr{(?:[^\x09\x20])};
429							${Char::Ewindows1258::not_cntrl} = qr{(?:[^\x00-\x1F\x7F])};
430							${Char::Ewindows1258::not_digit} = qr{(?:[^\x30-\x39])};
431							${Char::Ewindows1258::not_graph} = qr{(?:[^\x21-\x7F])};
432							${Char::Ewindows1258::not_lower} = qr{(?:[^\x61-\x7A])};
433							${Char::Ewindows1258::not_lower_i} = qr{(?:[\x00-\xFF])};
434							${Char::Ewindows1258::not_print} = qr{(?:[^\x20-\x7F])};
435							${Char::Ewindows1258::not_punct} = qr{(?:[^\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E])};
436							${Char::Ewindows1258::not_space} = qr{(?:[^\s\x0B])};
437							${Char::Ewindows1258::not_upper} = qr{(?:[^\x41-\x5A])};
438							${Char::Ewindows1258::not_upper_i} = qr{(?:[\x00-\xFF])};
439							${Char::Ewindows1258::not_word} = qr{(?:[^\x30-\x39\x41-\x5A\x5F\x61-\x7A])};
440							${Char::Ewindows1258::not_xdigit} = qr{(?:[^\x30-\x39\x41-\x46\x61-\x66])};
441							${Char::Ewindows1258::eb} = qr{(?:\A(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[0-9A-Z_a-z])\|(?<=[0-9A-Z_a-z])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]\|\z))};
442							${Char::Ewindows1258::eB} = qr{(?:(?<=[0-9A-Z_a-z])(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]))};
443
444							# avoid: Name "Char::Ewindows1258::foo" used only once: possible typo at here.
445							${Char::Ewindows1258::dot} = ${Char::Ewindows1258::dot};
446							${Char::Ewindows1258::dot_s} = ${Char::Ewindows1258::dot_s};
447							${Char::Ewindows1258::eD} = ${Char::Ewindows1258::eD};
448							${Char::Ewindows1258::eS} = ${Char::Ewindows1258::eS};
449							${Char::Ewindows1258::eW} = ${Char::Ewindows1258::eW};
450							${Char::Ewindows1258::eH} = ${Char::Ewindows1258::eH};
451							${Char::Ewindows1258::eV} = ${Char::Ewindows1258::eV};
452							${Char::Ewindows1258::eR} = ${Char::Ewindows1258::eR};
453							${Char::Ewindows1258::eN} = ${Char::Ewindows1258::eN};
454							${Char::Ewindows1258::not_alnum} = ${Char::Ewindows1258::not_alnum};
455							${Char::Ewindows1258::not_alpha} = ${Char::Ewindows1258::not_alpha};
456							${Char::Ewindows1258::not_ascii} = ${Char::Ewindows1258::not_ascii};
457							${Char::Ewindows1258::not_blank} = ${Char::Ewindows1258::not_blank};
458							${Char::Ewindows1258::not_cntrl} = ${Char::Ewindows1258::not_cntrl};
459							${Char::Ewindows1258::not_digit} = ${Char::Ewindows1258::not_digit};
460							${Char::Ewindows1258::not_graph} = ${Char::Ewindows1258::not_graph};
461							${Char::Ewindows1258::not_lower} = ${Char::Ewindows1258::not_lower};
462							${Char::Ewindows1258::not_lower_i} = ${Char::Ewindows1258::not_lower_i};
463							${Char::Ewindows1258::not_print} = ${Char::Ewindows1258::not_print};
464							${Char::Ewindows1258::not_punct} = ${Char::Ewindows1258::not_punct};
465							${Char::Ewindows1258::not_space} = ${Char::Ewindows1258::not_space};
466							${Char::Ewindows1258::not_upper} = ${Char::Ewindows1258::not_upper};
467							${Char::Ewindows1258::not_upper_i} = ${Char::Ewindows1258::not_upper_i};
468							${Char::Ewindows1258::not_word} = ${Char::Ewindows1258::not_word};
469							${Char::Ewindows1258::not_xdigit} = ${Char::Ewindows1258::not_xdigit};
470							${Char::Ewindows1258::eb} = ${Char::Ewindows1258::eb};
471							${Char::Ewindows1258::eB} = ${Char::Ewindows1258::eB};
472
473							#
474							# Windows-1258 split
475							#
476							sub Char::Ewindows1258::split(;$$$) {
477
478							# P.794 29.2.161. split
479							# in Chapter 29: Functions
480							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
481
482							# P.951 split
483							# in Chapter 27: Functions
484							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
485
486	0			0	0	0	my $pattern = $_[0];
487	0					0	my $string = $_[1];
488	0					0	my $limit = $_[2];
489
490							# if $pattern is also omitted or is the literal space, " "
491	0	0				0	if (not defined $pattern) {
492	0					0	$pattern = ' ';
493							}
494
495							# if $string is omitted, the function splits the $_ string
496	0	0				0	if (not defined $string) {
497	0	0				0	if (defined $_) {
498	0					0	$string = $_;
499							}
500							else {
501	0					0	$string = '';
502							}
503							}
504
505	0					0	my @split = ();
506
507							# when string is empty
508	0	0				0	if ($string eq '') {
		0
509
510							# resulting list value in list context
511	0	0				0	if (wantarray) {
512	0					0	return @split;
513							}
514
515							# count of substrings in scalar context
516							else {
517	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
518	0					0	@_ = @split;
519	0					0	return scalar @_;
520							}
521							}
522
523							# split's first argument is more consistently interpreted
524							#
525							# After some changes earlier in v5.17, split's behavior has been simplified:
526							# if the PATTERN argument evaluates to a string containing one space, it is
527							# treated the way that a literal string containing one space once was.
528							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#split's_first_argument_is_more_consistently_interpreted
529
530							# if $pattern is also omitted or is the literal space, " ", the function splits
531							# on whitespace, /\s+/, after skipping any leading whitespace
532							# (and so on)
533
534							elsif ($pattern eq ' ') {
535	0	0				0	if (not defined $limit) {
536	0					0	return CORE::split(' ', $string);
537							}
538							else {
539	0					0	return CORE::split(' ', $string, $limit);
540							}
541							}
542
543							# if $limit is negative, it is treated as if an arbitrarily large $limit has been specified
544	0	0	0			0	if ((not defined $limit) or ($limit <= 0)) {
		0
545
546							# a pattern capable of matching either the null string or something longer than the
547							# null string will split the value of $string into separate characters wherever it
548							# matches the null string between characters
549							# (and so on)
550
551	0	0				0	if ('' =~ / \A $pattern \z /xms) {
552	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
553	0					0	my $limit = scalar(() = $string =~ /($pattern)/oxmsg);
554
555							# P.1024 Appendix W.10 Multibyte Processing
556							# of ISBN 1-56592-224-7 CJKV Information Processing
557							# (and so on)
558
559							# the //m modifier is assumed when you split on the pattern /^/
560							# (and so on)
561
562							# V
563	0		0			0	while ((--$limit > 0) and ($string =~ s/\A((?:$q_char)+?)$pattern//m)) {
564
565							# if the $pattern contains parentheses, then the substring matched by each pair of parentheses
566							# is included in the resulting list, interspersed with the fields that are ordinarily returned
567							# (and so on)
568
569	0					0	local $@;
570	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
571	0					0	push @split, eval('$' . $digit);
572							}
573							}
574							}
575
576							else {
577	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
578
579							# V
580	0					0	while ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
581	0					0	local $@;
582	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
583	0					0	push @split, eval('$' . $digit);
584							}
585							}
586							}
587							}
588
589							elsif ($limit > 0) {
590	0	0				0	if ('' =~ / \A $pattern \z /xms) {
591	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
592	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
593
594							# V
595	0	0				0	if ($string =~ s/\A((?:$q_char)+?)$pattern//m) {
596	0					0	local $@;
597	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
598	0					0	push @split, eval('$' . $digit);
599							}
600							}
601							}
602							}
603							else {
604	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
605	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
606
607							# V
608	0	0				0	if ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
609	0					0	local $@;
610	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
611	0					0	push @split, eval('$' . $digit);
612							}
613							}
614							}
615							}
616							}
617
618	0	0				0	if (CORE::length($string) > 0) {
619	0					0	push @split, $string;
620							}
621
622							# if $_[2] (NOT "$limit") is omitted or zero, trailing null fields are stripped from the result
623	0	0	0			0	if ((not defined $_[2]) or ($_[2] == 0)) {
624	0		0			0	while ((scalar(@split) >= 1) and ($split[-1] eq '')) {
625	0					0	pop @split;
626							}
627							}
628
629							# resulting list value in list context
630	0	0				0	if (wantarray) {
631	0					0	return @split;
632							}
633
634							# count of substrings in scalar context
635							else {
636	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
637	0					0	@_ = @split;
638	0					0	return scalar @_;
639							}
640							}
641
642							#
643							# get last subexpression offsets
644							#
645							sub _last_subexpression_offsets {
646	0			0		0	my $pattern = $_[0];
647
648							# remove comment
649	0					0	$pattern =~ s/$\?\# .*? $//oxmsg;
650
651	0					0	my $modifier = '';
652	0	0				0	if ($pattern =~ /\(\?\^? ([\-A-Za-z]+) :/oxms) {
653	0					0	$modifier = $1;
654	0					0	$modifier =~ s/-[A-Za-z]*//;
655							}
656
657							# with /x modifier
658	0					0	my @char = ();
659	0	0				0	if ($modifier =~ /x/oxms) {
660	0					0	@char = $pattern =~ /\G(
661							\\ (?:$q_char) \|
662							\# (?:$q_char)*? $ \|
663							\[ (?: \\\] \| (?:$q_char))+? \] \|
664							\(\? \|
665							(?:$q_char)
666							)/oxmsg;
667							}
668
669							# without /x modifier
670							else {
671	0					0	@char = $pattern =~ /\G(
672							\\ (?:$q_char) \|
673							\[ (?: \\\] \| (?:$q_char))+? \] \|
674							\(\? \|
675							(?:$q_char)
676							)/oxmsg;
677							}
678
679	0					0	return scalar grep { $_ eq '(' } @char;
	0					0
680							}
681
682							#
683							# Windows-1258 transliteration (tr///)
684							#
685							sub Char::Ewindows1258::tr($$$$;$) {
686
687	0			0	0	0	my $bind_operator = $_[1];
688	0					0	my $searchlist = $_[2];
689	0					0	my $replacementlist = $_[3];
690	0		0			0	my $modifier = $_[4] \|\| '';
691
692	0	0				0	if ($modifier =~ /r/oxms) {
693	0	0				0	if ($bind_operator =~ / !~ /oxms) {
694	0					0	croak "Using !~ with tr///r doesn't make sense";
695							}
696							}
697
698	0					0	my @char = $_[0] =~ /\G ($q_char) /oxmsg;
699	0					0	my @searchlist = _charlist_tr($searchlist);
700	0					0	my @replacementlist = _charlist_tr($replacementlist);
701
702	0					0	my %tr = ();
703	0					0	for (my $i=0; $i <= $#searchlist; $i++) {
704	0	0				0	if (not exists $tr{$searchlist[$i]}) {
705	0	0	0			0	if (defined $replacementlist[$i] and ($replacementlist[$i] ne '')) {
		0	0
		0
706	0					0	$tr{$searchlist[$i]} = $replacementlist[$i];
707							}
708							elsif ($modifier =~ /d/oxms) {
709	0					0	$tr{$searchlist[$i]} = '';
710							}
711							elsif (defined $replacementlist[-1] and ($replacementlist[-1] ne '')) {
712	0					0	$tr{$searchlist[$i]} = $replacementlist[-1];
713							}
714							else {
715	0					0	$tr{$searchlist[$i]} = $searchlist[$i];
716							}
717							}
718							}
719
720	0					0	my $tr = 0;
721	0					0	my $replaced = '';
722	0	0				0	if ($modifier =~ /c/oxms) {
723	0					0	while (defined(my $char = shift @char)) {
724	0	0				0	if (not exists $tr{$char}) {
725	0	0				0	if (defined $replacementlist[0]) {
726	0					0	$replaced .= $replacementlist[0];
727							}
728	0					0	$tr++;
729	0	0				0	if ($modifier =~ /s/oxms) {
730	0		0			0	while (@char and (not exists $tr{$char[0]})) {
731	0					0	shift @char;
732	0					0	$tr++;
733							}
734							}
735							}
736							else {
737	0					0	$replaced .= $char;
738							}
739							}
740							}
741							else {
742	0					0	while (defined(my $char = shift @char)) {
743	0	0				0	if (exists $tr{$char}) {
744	0					0	$replaced .= $tr{$char};
745	0					0	$tr++;
746	0	0				0	if ($modifier =~ /s/oxms) {
747	0		0			0	while (@char and (exists $tr{$char[0]}) and ($tr{$char[0]} eq $tr{$char})) {
			0
748	0					0	shift @char;
749	0					0	$tr++;
750							}
751							}
752							}
753							else {
754	0					0	$replaced .= $char;
755							}
756							}
757							}
758
759	0	0				0	if ($modifier =~ /r/oxms) {
760	0					0	return $replaced;
761							}
762							else {
763	0					0	$_[0] = $replaced;
764	0	0				0	if ($bind_operator =~ / !~ /oxms) {
765	0					0	return not $tr;
766							}
767							else {
768	0					0	return $tr;
769							}
770							}
771							}
772
773							#
774							# Windows-1258 chop
775							#
776							sub Char::Ewindows1258::chop(@) {
777
778	0			0	0	0	my $chop;
779	0	0				0	if (@_ == 0) {
780	0					0	my @char = /\G ($q_char) /oxmsg;
781	0					0	$chop = pop @char;
782	0					0	$_ = join '', @char;
783							}
784							else {
785	0					0	for (@_) {
786	0					0	my @char = /\G ($q_char) /oxmsg;
787	0					0	$chop = pop @char;
788	0					0	$_ = join '', @char;
789							}
790							}
791	0					0	return $chop;
792							}
793
794							#
795							# Windows-1258 index by octet
796							#
797							sub Char::Ewindows1258::index($$;$) {
798
799	0			0	1	0	my($str,$substr,$position) = @_;
800	0		0			0	$position \|\|= 0;
801	0					0	my $pos = 0;
802
803	0					0	while ($pos < CORE::length($str)) {
804	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
805	0	0				0	if ($pos >= $position) {
806	0					0	return $pos;
807							}
808							}
809	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
810	0					0	$pos += CORE::length($1);
811							}
812							else {
813	0					0	$pos += 1;
814							}
815							}
816	0					0	return -1;
817							}
818
819							#
820							# Windows-1258 reverse index
821							#
822							sub Char::Ewindows1258::rindex($$;$) {
823
824	0			0	0	0	my($str,$substr,$position) = @_;
825	0		0			0	$position \|\|= CORE::length($str) - 1;
826	0					0	my $pos = 0;
827	0					0	my $rindex = -1;
828
829	0		0			0	while (($pos < CORE::length($str)) and ($pos <= $position)) {
830	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
831	0					0	$rindex = $pos;
832							}
833	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
834	0					0	$pos += CORE::length($1);
835							}
836							else {
837	0					0	$pos += 1;
838							}
839							}
840	0					0	return $rindex;
841							}
842
843							#
844							# Windows-1258 lower case first with parameter
845							#
846							sub Char::Ewindows1258::lcfirst(@) {
847	0	0		0	0	0	if (@_) {
848	0					0	my $s = shift @_;
849	0	0	0			0	if (@_ and wantarray) {
850	0					0	return Char::Ewindows1258::lc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
851							}
852							else {
853	0					0	return Char::Ewindows1258::lc(CORE::substr($s,0,1)) . CORE::substr($s,1);
854							}
855							}
856							else {
857	0					0	return Char::Ewindows1258::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
858							}
859							}
860
861							#
862							# Windows-1258 lower case first without parameter
863							#
864							sub Char::Ewindows1258::lcfirst_() {
865	0			0	0	0	return Char::Ewindows1258::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
866							}
867
868							#
869							# Windows-1258 lower case with parameter
870							#
871							sub Char::Ewindows1258::lc(@) {
872	0	0		0	0	0	if (@_) {
873	0					0	my $s = shift @_;
874	0	0	0			0	if (@_ and wantarray) {
875	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
876							}
877							else {
878	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
879							}
880							}
881							else {
882	0					0	return Char::Ewindows1258::lc_();
883							}
884							}
885
886							#
887							# Windows-1258 lower case without parameter
888							#
889							sub Char::Ewindows1258::lc_() {
890	0			0	0	0	my $s = $_;
891	0	0				0	return join '', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
892							}
893
894							#
895							# Windows-1258 upper case first with parameter
896							#
897							sub Char::Ewindows1258::ucfirst(@) {
898	0	0		0	0	0	if (@_) {
899	0					0	my $s = shift @_;
900	0	0	0			0	if (@_ and wantarray) {
901	0					0	return Char::Ewindows1258::uc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
902							}
903							else {
904	0					0	return Char::Ewindows1258::uc(CORE::substr($s,0,1)) . CORE::substr($s,1);
905							}
906							}
907							else {
908	0					0	return Char::Ewindows1258::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
909							}
910							}
911
912							#
913							# Windows-1258 upper case first without parameter
914							#
915							sub Char::Ewindows1258::ucfirst_() {
916	0			0	0	0	return Char::Ewindows1258::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
917							}
918
919							#
920							# Windows-1258 upper case with parameter
921							#
922							sub Char::Ewindows1258::uc(@) {
923	0	0		0	0	0	if (@_) {
924	0					0	my $s = shift @_;
925	0	0	0			0	if (@_ and wantarray) {
926	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
927							}
928							else {
929	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
930							}
931							}
932							else {
933	0					0	return Char::Ewindows1258::uc_();
934							}
935							}
936
937							#
938							# Windows-1258 upper case without parameter
939							#
940							sub Char::Ewindows1258::uc_() {
941	0			0	0	0	my $s = $_;
942	0	0				0	return join '', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
943							}
944
945							#
946							# Windows-1258 fold case with parameter
947							#
948							sub Char::Ewindows1258::fc(@) {
949	0	0		0	0	0	if (@_) {
950	0					0	my $s = shift @_;
951	0	0	0			0	if (@_ and wantarray) {
952	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
953							}
954							else {
955	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
956							}
957							}
958							else {
959	0					0	return Char::Ewindows1258::fc_();
960							}
961							}
962
963							#
964							# Windows-1258 fold case without parameter
965							#
966							sub Char::Ewindows1258::fc_() {
967	0			0	0	0	my $s = $_;
968	0	0				0	return join '', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
969							}
970
971							#
972							# Windows-1258 regexp capture
973							#
974							{
975							sub Char::Ewindows1258::capture {
976	0			0	1	0	return $_[0];
977							}
978							}
979
980							#
981							# Windows-1258 regexp ignore case modifier
982							#
983							sub Char::Ewindows1258::ignorecase {
984
985	0			0	0	0	my @string = @_;
986	0					0	my $metachar = qr/[\@\\\|[\]{]/oxms;
987
988							# ignore case of $scalar or @array
989	0					0	for my $string (@string) {
990
991							# split regexp
992	0					0	my @char = $string =~ /\G(
993							\[\^ \|
994							\\? (?:$q_char)
995							)/oxmsg;
996
997							# unescape character
998	0					0	for (my $i=0; $i <= $#char; $i++) {
999	0	0				0	next if not defined $char[$i];
1000
1001							# open character class [...]
1002	0	0				0	if ($char[$i] eq '[') {
		0
		0
		0
1003	0					0	my $left = $i;
1004
1005							# [] make die "unmatched [] in regexp ..."
1006
1007	0	0				0	if ($char[$i+1] eq ']') {
1008	0					0	$i++;
1009							}
1010
1011	0					0	while (1) {
1012	0	0				0	if (++$i > $#char) {
1013	0					0	croak "Unmatched [] in regexp";
1014							}
1015	0	0				0	if ($char[$i] eq ']') {
1016	0					0	my $right = $i;
1017	0					0	my @charlist = charlist_qr(@char[$left+1..$right-1], 'i');
1018
1019							# escape character
1020	0					0	for my $char (@charlist) {
1021	0	0				0	if (0) {
1022							}
1023
1024	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1025	0					0	$char = $1 . '\\' . $char;
1026							}
1027							}
1028
1029							# [...]
1030	0					0	splice @char, $left, $right-$left+1, '(?:' . join('\|', @charlist) . ')';
1031
1032	0					0	$i = $left;
1033	0					0	last;
1034							}
1035							}
1036							}
1037
1038							# open character class [^...]
1039							elsif ($char[$i] eq '[^') {
1040	0					0	my $left = $i;
1041
1042							# [^] make die "unmatched [] in regexp ..."
1043
1044	0	0				0	if ($char[$i+1] eq ']') {
1045	0					0	$i++;
1046							}
1047
1048	0					0	while (1) {
1049	0	0				0	if (++$i > $#char) {
1050	0					0	croak "Unmatched [] in regexp";
1051							}
1052	0	0				0	if ($char[$i] eq ']') {
1053	0					0	my $right = $i;
1054	0					0	my @charlist = charlist_not_qr(@char[$left+1..$right-1], 'i');
1055
1056							# escape character
1057	0					0	for my $char (@charlist) {
1058	0	0				0	if (0) {
1059							}
1060
1061	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1062	0					0	$char = '\\' . $char;
1063							}
1064							}
1065
1066							# [^...]
1067	0					0	splice @char, $left, $right-$left+1, '(?!' . join('\|', @charlist) . ")(?:$your_char)";
1068
1069	0					0	$i = $left;
1070	0					0	last;
1071							}
1072							}
1073							}
1074
1075							# rewrite classic character class or escape character
1076							elsif (my $char = classic_character_class($char[$i])) {
1077	0					0	$char[$i] = $char;
1078							}
1079
1080							# with /i modifier
1081							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1082	0					0	my $uc = Char::Ewindows1258::uc($char[$i]);
1083	0					0	my $fc = Char::Ewindows1258::fc($char[$i]);
1084	0	0				0	if ($uc ne $fc) {
1085	0	0				0	if (CORE::length($fc) == 1) {
1086	0					0	$char[$i] = '[' . $uc . $fc . ']';
1087							}
1088							else {
1089	0					0	$char[$i] = '(?:' . $uc . '\|' . $fc . ')';
1090							}
1091							}
1092							}
1093							}
1094
1095							# characterize
1096	0					0	for (my $i=0; $i <= $#char; $i++) {
1097	0	0				0	next if not defined $char[$i];
1098
1099	0	0				0	if (0) {
1100							}
1101
1102							# quote character before ? + * {
1103	0	0				0	elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
1104	0	0				0	if ($char[$i-1] !~ /\A [\x00-\xFF] \z/oxms) {
1105	0					0	$char[$i-1] = '(?:' . $char[$i-1] . ')';
1106							}
1107							}
1108							}
1109
1110	0					0	$string = join '', @char;
1111							}
1112
1113							# make regexp string
1114	0					0	return @string;
1115							}
1116
1117							#
1118							# classic character class ( \D \S \W \d \s \w \C \X \H \V \h \v \R \N \b \B )
1119							#
1120							sub Char::Ewindows1258::classic_character_class {
1121	0			0	0	0	my($char) = @_;
1122
1123							return {
1124	0		0			0	'\D' => '${Char::Ewindows1258::eD}',
1125							'\S' => '${Char::Ewindows1258::eS}',
1126							'\W' => '${Char::Ewindows1258::eW}',
1127							'\d' => '[0-9]',
1128
1129							# Before Perl 5.6, \s only matched the five whitespace characters
1130							# tab, newline, form-feed, carriage return, and the space character
1131							# itself, which, taken together, is the character class [\t\n\f\r ].
1132
1133							# Vertical tabs are now whitespace
1134							# \s in a regex now matches a vertical tab in all circumstances.
1135							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1136							# \t \n \v \f \r space
1137							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1138							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1139							'\s' => '\s',
1140
1141							'\w' => '[0-9A-Z_a-z]',
1142							'\C' => '[\x00-\xFF]',
1143							'\X' => 'X',
1144
1145							# \h \v \H \V
1146
1147							# P.114 Character Class Shortcuts
1148							# in Chapter 7: In the World of Regular Expressions
1149							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
1150
1151							# P.357 13.2.3 Whitespace
1152							# in Chapter 13: perlrecharclass: Perl Regular Expression Character Classes
1153							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1154							#
1155							# 0x00009 CHARACTER TABULATION h s
1156							# 0x0000a LINE FEED (LF) vs
1157							# 0x0000b LINE TABULATION v
1158							# 0x0000c FORM FEED (FF) vs
1159							# 0x0000d CARRIAGE RETURN (CR) vs
1160							# 0x00020 SPACE h s
1161
1162							# P.196 Table 5-9. Alphanumeric regex metasymbols
1163							# in Chapter 5. Pattern Matching
1164							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1165
1166							# (and so on)
1167
1168							'\H' => '${Char::Ewindows1258::eH}',
1169							'\V' => '${Char::Ewindows1258::eV}',
1170							'\h' => '[\x09\x20]',
1171							'\v' => '[\x0A\x0B\x0C\x0D]',
1172							'\R' => '${Char::Ewindows1258::eR}',
1173
1174							# \N
1175							#
1176							# http://perldoc.perl.org/perlre.html
1177							# Character Classes and other Special Escapes
1178							# Any character but \n (experimental). Not affected by /s modifier
1179
1180							'\N' => '${Char::Ewindows1258::eN}',
1181
1182							# \b \B
1183
1184							# P.180 Boundaries: The \b and \B Assertions
1185							# in Chapter 5: Pattern Matching
1186							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1187
1188							# P.219 Boundaries: The \b and \B Assertions
1189							# in Chapter 5: Pattern Matching
1190							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1191
1192							# '\b' => '(?:(?<=\A\|\W)(?=\w)\|(?<=\w)(?=\W\|\z))',
1193							'\b' => '${Char::Ewindows1258::eb}',
1194
1195							# '\B' => '(?:(?<=\w)(?=\w)\|(?<=\W)(?=\W))',
1196							'\B' => '${Char::Ewindows1258::eB}',
1197
1198							}->{$char} \|\| '';
1199							}
1200
1201							#
1202							# prepare Windows-1258 characters per length
1203							#
1204
1205							# 1 octet characters
1206							my @chars1 = ();
1207							sub chars1 {
1208	0	0		0	0	0	if (@chars1) {
1209	0					0	return @chars1;
1210							}
1211	0	0				0	if (exists $range_tr{1}) {
1212	0					0	my @ranges = @{ $range_tr{1} };
	0					0
1213	0					0	while (my @range = splice(@ranges,0,1)) {
1214	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1215	0					0	push @chars1, pack 'C', $oct0;
1216							}
1217							}
1218							}
1219	0					0	return @chars1;
1220							}
1221
1222							# 2 octets characters
1223							my @chars2 = ();
1224							sub chars2 {
1225	0	0		0	0	0	if (@chars2) {
1226	0					0	return @chars2;
1227							}
1228	0	0				0	if (exists $range_tr{2}) {
1229	0					0	my @ranges = @{ $range_tr{2} };
	0					0
1230	0					0	while (my @range = splice(@ranges,0,2)) {
1231	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1232	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1233	0					0	push @chars2, pack 'CC', $oct0,$oct1;
1234							}
1235							}
1236							}
1237							}
1238	0					0	return @chars2;
1239							}
1240
1241							# 3 octets characters
1242							my @chars3 = ();
1243							sub chars3 {
1244	0	0		0	0	0	if (@chars3) {
1245	0					0	return @chars3;
1246							}
1247	0	0				0	if (exists $range_tr{3}) {
1248	0					0	my @ranges = @{ $range_tr{3} };
	0					0
1249	0					0	while (my @range = splice(@ranges,0,3)) {
1250	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1251	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1252	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1253	0					0	push @chars3, pack 'CCC', $oct0,$oct1,$oct2;
1254							}
1255							}
1256							}
1257							}
1258							}
1259	0					0	return @chars3;
1260							}
1261
1262							# 4 octets characters
1263							my @chars4 = ();
1264							sub chars4 {
1265	0	0		0	0	0	if (@chars4) {
1266	0					0	return @chars4;
1267							}
1268	0	0				0	if (exists $range_tr{4}) {
1269	0					0	my @ranges = @{ $range_tr{4} };
	0					0
1270	0					0	while (my @range = splice(@ranges,0,4)) {
1271	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1272	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1273	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1274	0					0	for my $oct3 (@{$range[3]}) {
	0					0
1275	0					0	push @chars4, pack 'CCCC', $oct0,$oct1,$oct2,$oct3;
1276							}
1277							}
1278							}
1279							}
1280							}
1281							}
1282	0					0	return @chars4;
1283							}
1284
1285							#
1286							# Windows-1258 open character list for tr
1287							#
1288							sub _charlist_tr {
1289
1290	0			0		0	local $_ = shift @_;
1291
1292							# unescape character
1293	0					0	my @char = ();
1294	0					0	while (not /\G \z/oxmsgc) {
1295	0	0				0	if (/\G (\\0?55\|\\x2[Dd]\|\\-) /oxmsgc) {
		0
		0
		0
		0
		0
		0
1296	0					0	push @char, '\-';
1297							}
1298							elsif (/\G \\ ([0-7]{2,3}) /oxmsgc) {
1299	0					0	push @char, CORE::chr(oct $1);
1300							}
1301							elsif (/\G \\x ([0-9A-Fa-f]{1,2}) /oxmsgc) {
1302	0					0	push @char, CORE::chr(hex $1);
1303							}
1304							elsif (/\G \\c ([\x40-\x5F]) /oxmsgc) {
1305	0					0	push @char, CORE::chr(CORE::ord($1) & 0x1F);
1306							}
1307							elsif (/\G (\\ [0nrtfbae]) /oxmsgc) {
1308	0					0	push @char, {
1309							'\0' => "\0",
1310							'\n' => "\n",
1311							'\r' => "\r",
1312							'\t' => "\t",
1313							'\f' => "\f",
1314							'\b' => "\x08", # \b means backspace in character class
1315							'\a' => "\a",
1316							'\e' => "\e",
1317							}->{$1};
1318							}
1319							elsif (/\G \\ ($q_char) /oxmsgc) {
1320	0					0	push @char, $1;
1321							}
1322							elsif (/\G ($q_char) /oxmsgc) {
1323	0					0	push @char, $1;
1324							}
1325							}
1326
1327							# join separated multiple-octet
1328	0					0	@char = join('',@char) =~ /\G (\\-\|$q_char) /oxmsg;
1329
1330							# unescape '-'
1331	0					0	my @i = ();
1332	0					0	for my $i (0 .. $#char) {
1333	0	0				0	if ($char[$i] eq '\-') {
		0
1334	0					0	$char[$i] = '-';
1335							}
1336							elsif ($char[$i] eq '-') {
1337	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1338	0					0	push @i, $i;
1339							}
1340							}
1341							}
1342
1343							# open character list (reverse for splice)
1344	0					0	for my $i (CORE::reverse @i) {
1345	0					0	my @range = ();
1346
1347							# range error
1348	0	0	0			0	if ((CORE::length($char[$i-1]) > CORE::length($char[$i+1])) or ($char[$i-1] gt $char[$i+1])) {
1349	0					0	croak "Invalid tr/// range \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1350							}
1351
1352							# range of multiple-octet code
1353	0	0				0	if (CORE::length($char[$i-1]) == 1) {
		0
		0
		0
1354	0	0				0	if (CORE::length($char[$i+1]) == 1) {
		0
		0
		0
1355	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars1();
	0					0
1356							}
1357							elsif (CORE::length($char[$i+1]) == 2) {
1358	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1359	0					0	push @range, grep {$_ le $char[$i+1]} chars2();
	0					0
1360							}
1361							elsif (CORE::length($char[$i+1]) == 3) {
1362	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1363	0					0	push @range, chars2();
1364	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1365							}
1366							elsif (CORE::length($char[$i+1]) == 4) {
1367	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1368	0					0	push @range, chars2();
1369	0					0	push @range, chars3();
1370	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1371							}
1372							else {
1373	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1374							}
1375							}
1376							elsif (CORE::length($char[$i-1]) == 2) {
1377	0	0				0	if (CORE::length($char[$i+1]) == 2) {
		0
		0
1378	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars2();
	0					0
1379							}
1380							elsif (CORE::length($char[$i+1]) == 3) {
1381	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1382	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1383							}
1384							elsif (CORE::length($char[$i+1]) == 4) {
1385	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1386	0					0	push @range, chars3();
1387	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1388							}
1389							else {
1390	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1391							}
1392							}
1393							elsif (CORE::length($char[$i-1]) == 3) {
1394	0	0				0	if (CORE::length($char[$i+1]) == 3) {
		0
1395	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars3();
	0					0
1396							}
1397							elsif (CORE::length($char[$i+1]) == 4) {
1398	0					0	push @range, grep {$char[$i-1] le $_} chars3();
	0					0
1399	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1400							}
1401							else {
1402	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1403							}
1404							}
1405							elsif (CORE::length($char[$i-1]) == 4) {
1406	0	0				0	if (CORE::length($char[$i+1]) == 4) {
1407	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars4();
	0					0
1408							}
1409							else {
1410	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1411							}
1412							}
1413							else {
1414	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1415							}
1416
1417	0					0	splice @char, $i-1, 3, @range;
1418							}
1419
1420	0					0	return @char;
1421							}
1422
1423							#
1424							# Windows-1258 open character class
1425							#
1426							sub _cc {
1427	0	0		0		0	if (scalar(@_) == 0) {
		0
		0
1428	0					0	die __FILE__, ": subroutine cc got no parameter.";
1429							}
1430							elsif (scalar(@_) == 1) {
1431	0					0	return sprintf('\x%02X',$_[0]);
1432							}
1433							elsif (scalar(@_) == 2) {
1434	0	0				0	if ($_[0] > $_[1]) {
		0
		0
1435	0					0	die __FILE__, ": subroutine cc got \$_[0] > \$_[1] parameters).";
1436							}
1437							elsif ($_[0] == $_[1]) {
1438	0					0	return sprintf('\x%02X',$_[0]);
1439							}
1440							elsif (($_[0]+1) == $_[1]) {
1441	0					0	return sprintf('[\\x%02X\\x%02X]',$_[0],$_[1]);
1442							}
1443							else {
1444	0					0	return sprintf('[\\x%02X-\\x%02X]',$_[0],$_[1]);
1445							}
1446							}
1447							else {
1448	0					0	die __FILE__, ": subroutine cc got 3 or more parameters (@{[scalar(@_)]} parameters).";
	0					0
1449							}
1450							}
1451
1452							#
1453							# Windows-1258 octet range
1454							#
1455							sub _octets {
1456	0			0		0	my $length = shift @_;
1457
1458	0	0				0	if ($length == 1) {
1459	0					0	my($a1) = unpack 'C', $_[0];
1460	0					0	my($z1) = unpack 'C', $_[1];
1461
1462	0	0				0	if ($a1 > $z1) {
1463	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$a1) . '-\x' . unpack('H',$z1);
1464							}
1465
1466	0	0				0	if ($a1 == $z1) {
		0
1467	0					0	return sprintf('\x%02X',$a1);
1468							}
1469							elsif (($a1+1) == $z1) {
1470	0					0	return sprintf('\x%02X\x%02X',$a1,$z1);
1471							}
1472							else {
1473	0					0	return sprintf('\x%02X-\x%02X',$a1,$z1);
1474							}
1475							}
1476							else {
1477	0					0	die __FILE__, ": subroutine _octets got invalid length ($length).";
1478							}
1479							}
1480
1481							#
1482							# Windows-1258 range regexp
1483							#
1484							sub _range_regexp {
1485	0			0		0	my($length,$first,$last) = @_;
1486
1487	0					0	my @range_regexp = ();
1488	0	0				0	if (not exists $range_tr{$length}) {
1489	0					0	return @range_regexp;
1490							}
1491
1492	0					0	my @ranges = @{ $range_tr{$length} };
	0					0
1493	0					0	while (my @range = splice(@ranges,0,$length)) {
1494	0					0	my $min = '';
1495	0					0	my $max = '';
1496	0					0	for (my $i=0; $i < $length; $i++) {
1497	0					0	$min .= pack 'C', $range[$i][0];
1498	0					0	$max .= pack 'C', $range[$i][-1];
1499							}
1500
1501							# min___max
1502							# FIRST_____________LAST
1503							# (nothing)
1504
1505	0	0	0			0	if ($max lt $first) {
		0	0
		0	0
		0	0
		0	0
		0	0
		0	0
1506							}
1507
1508							# **********
1509							# min_________max
1510							# FIRST_____________LAST
1511							# **********
1512
1513							elsif (($min le $first) and ($first le $max) and ($max le $last)) {
1514	0					0	push @range_regexp, _octets($length,$first,$max,$min,$max);
1515							}
1516
1517							# **********************
1518							# min________________max
1519							# FIRST_____________LAST
1520							# **********************
1521
1522							elsif (($min eq $first) and ($max eq $last)) {
1523	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1524							}
1525
1526							# *********
1527							# min___max
1528							# FIRST_____________LAST
1529							# *********
1530
1531							elsif (($first le $min) and ($max le $last)) {
1532	0					0	push @range_regexp, _octets($length,$min,$max,$min,$max);
1533							}
1534
1535							# **********************
1536							# min__________________________max
1537							# FIRST_____________LAST
1538							# **********************
1539
1540							elsif (($min le $first) and ($last le $max)) {
1541	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1542							}
1543
1544							# *********
1545							# min________max
1546							# FIRST_____________LAST
1547							# *********
1548
1549							elsif (($first le $min) and ($min le $last) and ($last le $max)) {
1550	0					0	push @range_regexp, _octets($length,$min,$last,$min,$max);
1551							}
1552
1553							# min___max
1554							# FIRST_____________LAST
1555							# (nothing)
1556
1557							elsif ($last lt $min) {
1558							}
1559
1560							else {
1561	0					0	die __FILE__, ": subroutine _range_regexp panic.";
1562							}
1563							}
1564
1565	0					0	return @range_regexp;
1566							}
1567
1568							#
1569							# Windows-1258 open character list for qr and not qr
1570							#
1571							sub _charlist {
1572
1573	0			0		0	my $modifier = pop @_;
1574	0					0	my @char = @_;
1575
1576	0	0				0	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
1577
1578							# unescape character
1579	0					0	for (my $i=0; $i <= $#char; $i++) {
1580
1581							# escape - to ...
1582	0	0	0			0	if ($char[$i] eq '-') {
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
1583	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1584	0					0	$char[$i] = '...';
1585							}
1586							}
1587
1588							# octal escape sequence
1589							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
1590	0					0	$char[$i] = octchr($1);
1591							}
1592
1593							# hexadecimal escape sequence
1594							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
1595	0					0	$char[$i] = hexchr($1);
1596							}
1597
1598							# \N{CHARNAME} --> N\{CHARNAME}
1599							elsif ($char[$i] =~ /\A \\ (N) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1600	0					0	$char[$i] = $1 . '\\' . $2;
1601							}
1602
1603							# \p{PROPERTY} --> p\{PROPERTY}
1604							elsif ($char[$i] =~ /\A \\ (p) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1605	0					0	$char[$i] = $1 . '\\' . $2;
1606							}
1607
1608							# \P{PROPERTY} --> P\{PROPERTY}
1609							elsif ($char[$i] =~ /\A \\ (P) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1610	0					0	$char[$i] = $1 . '\\' . $2;
1611							}
1612
1613							# \p, \P, \X --> p, P, X
1614							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
1615	0					0	$char[$i] = $1;
1616							}
1617
1618							elsif ($char[$i] =~ /\A \\ ([0-7]{2,3}) \z/oxms) {
1619	0					0	$char[$i] = CORE::chr oct $1;
1620							}
1621							elsif ($char[$i] =~ /\A \\x ([0-9A-Fa-f]{1,2}) \z/oxms) {
1622	0					0	$char[$i] = CORE::chr hex $1;
1623							}
1624							elsif ($char[$i] =~ /\A \\c ([\x40-\x5F]) \z/oxms) {
1625	0					0	$char[$i] = CORE::chr(CORE::ord($1) & 0x1F);
1626							}
1627							elsif ($char[$i] =~ /\A (\\ [0nrtfbaedswDSWHVhvR]) \z/oxms) {
1628	0					0	$char[$i] = {
1629							'\0' => "\0",
1630							'\n' => "\n",
1631							'\r' => "\r",
1632							'\t' => "\t",
1633							'\f' => "\f",
1634							'\b' => "\x08", # \b means backspace in character class
1635							'\a' => "\a",
1636							'\e' => "\e",
1637							'\d' => '[0-9]',
1638
1639							# Vertical tabs are now whitespace
1640							# \s in a regex now matches a vertical tab in all circumstances.
1641							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1642							# \t \n \v \f \r space
1643							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1644							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1645							'\s' => '\s',
1646
1647							'\w' => '[0-9A-Z_a-z]',
1648							'\D' => '${Char::Ewindows1258::eD}',
1649							'\S' => '${Char::Ewindows1258::eS}',
1650							'\W' => '${Char::Ewindows1258::eW}',
1651
1652							'\H' => '${Char::Ewindows1258::eH}',
1653							'\V' => '${Char::Ewindows1258::eV}',
1654							'\h' => '[\x09\x20]',
1655							'\v' => '[\x0A\x0B\x0C\x0D]',
1656							'\R' => '${Char::Ewindows1258::eR}',
1657
1658							}->{$1};
1659							}
1660
1661							# POSIX-style character classes
1662							elsif ($ignorecase and ($char[$i] =~ /\A ( \[\: \^? (?:lower\|upper) :\] ) \z/oxms)) {
1663	0					0	$char[$i] = {
1664
1665							'[:lower:]' => '[\x41-\x5A\x61-\x7A]',
1666							'[:upper:]' => '[\x41-\x5A\x61-\x7A]',
1667							'[:^lower:]' => '${Char::Ewindows1258::not_lower_i}',
1668							'[:^upper:]' => '${Char::Ewindows1258::not_upper_i}',
1669
1670							}->{$1};
1671							}
1672							elsif ($char[$i] =~ /\A ( \[\: \^? (?:alnum\|alpha\|ascii\|blank\|cntrl\|digit\|graph\|lower\|print\|punct\|space\|upper\|word\|xdigit) :\] ) \z/oxms) {
1673	0					0	$char[$i] = {
1674
1675							'[:alnum:]' => '[\x30-\x39\x41-\x5A\x61-\x7A]',
1676							'[:alpha:]' => '[\x41-\x5A\x61-\x7A]',
1677							'[:ascii:]' => '[\x00-\x7F]',
1678							'[:blank:]' => '[\x09\x20]',
1679							'[:cntrl:]' => '[\x00-\x1F\x7F]',
1680							'[:digit:]' => '[\x30-\x39]',
1681							'[:graph:]' => '[\x21-\x7F]',
1682							'[:lower:]' => '[\x61-\x7A]',
1683							'[:print:]' => '[\x20-\x7F]',
1684							'[:punct:]' => '[\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]',
1685
1686							# P.174 POSIX-Style Character Classes
1687							# in Chapter 5: Pattern Matching
1688							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1689
1690							# P.311 11.2.4 Character Classes and other Special Escapes
1691							# in Chapter 11: perlre: Perl regular expressions
1692							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1693
1694							# P.210 POSIX-Style Character Classes
1695							# in Chapter 5: Pattern Matching
1696							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1697
1698							'[:space:]' => '[\s\x0B]', # "\s" plus vertical tab ("\cK")
1699
1700							'[:upper:]' => '[\x41-\x5A]',
1701							'[:word:]' => '[\x30-\x39\x41-\x5A\x5F\x61-\x7A]',
1702							'[:xdigit:]' => '[\x30-\x39\x41-\x46\x61-\x66]',
1703							'[:^alnum:]' => '${Char::Ewindows1258::not_alnum}',
1704							'[:^alpha:]' => '${Char::Ewindows1258::not_alpha}',
1705							'[:^ascii:]' => '${Char::Ewindows1258::not_ascii}',
1706							'[:^blank:]' => '${Char::Ewindows1258::not_blank}',
1707							'[:^cntrl:]' => '${Char::Ewindows1258::not_cntrl}',
1708							'[:^digit:]' => '${Char::Ewindows1258::not_digit}',
1709							'[:^graph:]' => '${Char::Ewindows1258::not_graph}',
1710							'[:^lower:]' => '${Char::Ewindows1258::not_lower}',
1711							'[:^print:]' => '${Char::Ewindows1258::not_print}',
1712							'[:^punct:]' => '${Char::Ewindows1258::not_punct}',
1713							'[:^space:]' => '${Char::Ewindows1258::not_space}',
1714							'[:^upper:]' => '${Char::Ewindows1258::not_upper}',
1715							'[:^word:]' => '${Char::Ewindows1258::not_word}',
1716							'[:^xdigit:]' => '${Char::Ewindows1258::not_xdigit}',
1717
1718							}->{$1};
1719							}
1720							elsif ($char[$i] =~ /\A \\ ($q_char) \z/oxms) {
1721	0					0	$char[$i] = $1;
1722							}
1723							}
1724
1725							# open character list
1726	0					0	my @singleoctet = ();
1727	0					0	my @multipleoctet = ();
1728	0					0	for (my $i=0; $i <= $#char; ) {
1729
1730							# escaped -
1731	0	0	0			0	if (defined($char[$i+1]) and ($char[$i+1] eq '...')) {
		0
		0
		0
		0
		0
1732	0					0	$i += 1;
1733	0					0	next;
1734							}
1735
1736							# make range regexp
1737							elsif ($char[$i] eq '...') {
1738
1739							# range error
1740	0	0				0	if (CORE::length($char[$i-1]) > CORE::length($char[$i+1])) {
		0
1741	0					0	croak 'Invalid [] range in regexp (length(A) > length(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1742							}
1743							elsif (CORE::length($char[$i-1]) == CORE::length($char[$i+1])) {
1744	0	0				0	if ($char[$i-1] gt $char[$i+1]) {
1745	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1746							}
1747							}
1748
1749							# make range regexp per length
1750	0					0	for my $length (CORE::length($char[$i-1]) .. CORE::length($char[$i+1])) {
1751	0					0	my @regexp = ();
1752
1753							# is first and last
1754	0	0	0			0	if (($length == CORE::length($char[$i-1])) and ($length == CORE::length($char[$i+1]))) {
		0	0
		0
		0
1755	0					0	push @regexp, _range_regexp($length, $char[$i-1], $char[$i+1]);
1756							}
1757
1758							# is first
1759							elsif ($length == CORE::length($char[$i-1])) {
1760	0					0	push @regexp, _range_regexp($length, $char[$i-1], "\xFF" x $length);
1761							}
1762
1763							# is inside in first and last
1764							elsif ((CORE::length($char[$i-1]) < $length) and ($length < CORE::length($char[$i+1]))) {
1765	0					0	push @regexp, _range_regexp($length, "\x00" x $length, "\xFF" x $length);
1766							}
1767
1768							# is last
1769							elsif ($length == CORE::length($char[$i+1])) {
1770	0					0	push @regexp, _range_regexp($length, "\x00" x $length, $char[$i+1]);
1771							}
1772
1773							else {
1774	0					0	die __FILE__, ": subroutine make_regexp panic.";
1775							}
1776
1777	0	0				0	if ($length == 1) {
1778	0					0	push @singleoctet, @regexp;
1779							}
1780							else {
1781	0					0	push @multipleoctet, @regexp;
1782							}
1783							}
1784
1785	0					0	$i += 2;
1786							}
1787
1788							# with /i modifier
1789							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1790	0	0				0	if ($modifier =~ /i/oxms) {
1791	0					0	my $uc = Char::Ewindows1258::uc($char[$i]);
1792	0					0	my $fc = Char::Ewindows1258::fc($char[$i]);
1793	0	0				0	if ($uc ne $fc) {
1794	0	0				0	if (CORE::length($fc) == 1) {
1795	0					0	push @singleoctet, $uc, $fc;
1796							}
1797							else {
1798	0					0	push @singleoctet, $uc;
1799	0					0	push @multipleoctet, $fc;
1800							}
1801							}
1802							else {
1803	0					0	push @singleoctet, $char[$i];
1804							}
1805							}
1806							else {
1807	0					0	push @singleoctet, $char[$i];
1808							}
1809	0					0	$i += 1;
1810							}
1811
1812							# single character of single octet code
1813							elsif ($char[$i] =~ /\A (?: \\h ) \z/oxms) {
1814	0					0	push @singleoctet, "\t", "\x20";
1815	0					0	$i += 1;
1816							}
1817							elsif ($char[$i] =~ /\A (?: \\v ) \z/oxms) {
1818	0					0	push @singleoctet, "\x0A", "\x0B", "\x0C", "\x0D";
1819	0					0	$i += 1;
1820							}
1821							elsif ($char[$i] =~ /\A (?: \\d \| \\s \| \\w ) \z/oxms) {
1822	0					0	push @singleoctet, $char[$i];
1823	0					0	$i += 1;
1824							}
1825
1826							# single character of multiple-octet code
1827							else {
1828	0					0	push @multipleoctet, $char[$i];
1829	0					0	$i += 1;
1830							}
1831							}
1832
1833							# quote metachar
1834	0					0	for (@singleoctet) {
1835	0	0				0	if ($_ eq '...') {
		0
		0
		0
		0
1836	0					0	$_ = '-';
1837							}
1838							elsif (/\A \n \z/oxms) {
1839	0					0	$_ = '\n';
1840							}
1841							elsif (/\A \r \z/oxms) {
1842	0					0	$_ = '\r';
1843							}
1844							elsif (/\A ([\x00-\x20\x7F-\xFF]) \z/oxms) {
1845	0					0	$_ = sprintf('\x%02X', CORE::ord $1);
1846							}
1847							elsif (/\A [\x00-\xFF] \z/oxms) {
1848	0					0	$_ = quotemeta $_;
1849							}
1850							}
1851
1852							# return character list
1853	0					0	return \@singleoctet, \@multipleoctet;
1854							}
1855
1856							#
1857							# Windows-1258 octal escape sequence
1858							#
1859							sub octchr {
1860	0			0	0	0	my($octdigit) = @_;
1861
1862	0					0	my @binary = ();
1863	0					0	for my $octal (split(//,$octdigit)) {
1864	0					0	push @binary, {
1865							'0' => '000',
1866							'1' => '001',
1867							'2' => '010',
1868							'3' => '011',
1869							'4' => '100',
1870							'5' => '101',
1871							'6' => '110',
1872							'7' => '111',
1873							}->{$octal};
1874							}
1875	0					0	my $binary = join '', @binary;
1876
1877	0					0	my $octchr = {
1878							# 1234567
1879							1 => pack('B*', "0000000$binary"),
1880							2 => pack('B*', "000000$binary"),
1881							3 => pack('B*', "00000$binary"),
1882							4 => pack('B*', "0000$binary"),
1883							5 => pack('B*', "000$binary"),
1884							6 => pack('B*', "00$binary"),
1885							7 => pack('B*', "0$binary"),
1886							0 => pack('B*', "$binary"),
1887
1888							}->{CORE::length($binary) % 8};
1889
1890	0					0	return $octchr;
1891							}
1892
1893							#
1894							# Windows-1258 hexadecimal escape sequence
1895							#
1896							sub hexchr {
1897	0			0	0	0	my($hexdigit) = @_;
1898
1899	0					0	my $hexchr = {
1900							1 => pack('H*', "0$hexdigit"),
1901							0 => pack('H*', "$hexdigit"),
1902
1903							}->{CORE::length($_[0]) % 2};
1904
1905	0					0	return $hexchr;
1906							}
1907
1908							#
1909							# Windows-1258 open character list for qr
1910							#
1911							sub charlist_qr {
1912
1913	0			0	0	0	my $modifier = pop @_;
1914	0					0	my @char = @_;
1915
1916	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
1917	0					0	my @singleoctet = @$singleoctet;
1918	0					0	my @multipleoctet = @$multipleoctet;
1919
1920							# return character list
1921	0	0				0	if (scalar(@singleoctet) >= 1) {
1922
1923							# with /i modifier
1924	0	0				0	if ($modifier =~ m/i/oxms) {
1925	0					0	my %singleoctet_ignorecase = ();
1926	0					0	for (@singleoctet) {
1927	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
1928	0					0	for my $ord (hex($1) .. hex($2)) {
1929	0					0	my $char = CORE::chr($ord);
1930	0					0	my $uc = Char::Ewindows1258::uc($char);
1931	0					0	my $fc = Char::Ewindows1258::fc($char);
1932	0	0				0	if ($uc eq $fc) {
1933	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
1934							}
1935							else {
1936	0	0				0	if (CORE::length($fc) == 1) {
1937	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1938	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
1939							}
1940							else {
1941	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1942	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
1943							}
1944							}
1945							}
1946							}
1947	0	0				0	if ($_ ne '') {
1948	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
1949							}
1950							}
1951	0					0	my $i = 0;
1952	0					0	my @singleoctet_ignorecase = ();
1953	0					0	for my $ord (0 .. 255) {
1954	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
1955	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
1956							}
1957							else {
1958	0					0	$i++;
1959							}
1960							}
1961	0					0	@singleoctet = ();
1962	0					0	for my $range (@singleoctet_ignorecase) {
1963	0	0				0	if (ref $range) {
1964	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
1965	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
1966							}
1967							elsif (scalar(@{$range}) == 2) {
1968	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1969							}
1970							else {
1971	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1972							}
1973							}
1974							}
1975							}
1976
1977	0					0	my $not_anchor = '';
1978
1979	0					0	push @multipleoctet, join('', $not_anchor, '[', @singleoctet, ']' );
1980							}
1981	0	0				0	if (scalar(@multipleoctet) >= 2) {
1982	0					0	return '(?:' . join('\|', @multipleoctet) . ')';
1983							}
1984							else {
1985	0					0	return $multipleoctet[0];
1986							}
1987							}
1988
1989							#
1990							# Windows-1258 open character list for not qr
1991							#
1992							sub charlist_not_qr {
1993
1994	0			0	0	0	my $modifier = pop @_;
1995	0					0	my @char = @_;
1996
1997	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
1998	0					0	my @singleoctet = @$singleoctet;
1999	0					0	my @multipleoctet = @$multipleoctet;
2000
2001							# with /i modifier
2002	0	0				0	if ($modifier =~ m/i/oxms) {
2003	0					0	my %singleoctet_ignorecase = ();
2004	0					0	for (@singleoctet) {
2005	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
2006	0					0	for my $ord (hex($1) .. hex($2)) {
2007	0					0	my $char = CORE::chr($ord);
2008	0					0	my $uc = Char::Ewindows1258::uc($char);
2009	0					0	my $fc = Char::Ewindows1258::fc($char);
2010	0	0				0	if ($uc eq $fc) {
2011	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
2012							}
2013							else {
2014	0	0				0	if (CORE::length($fc) == 1) {
2015	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2016	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
2017							}
2018							else {
2019	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2020	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
2021							}
2022							}
2023							}
2024							}
2025	0	0				0	if ($_ ne '') {
2026	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
2027							}
2028							}
2029	0					0	my $i = 0;
2030	0					0	my @singleoctet_ignorecase = ();
2031	0					0	for my $ord (0 .. 255) {
2032	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
2033	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
2034							}
2035							else {
2036	0					0	$i++;
2037							}
2038							}
2039	0					0	@singleoctet = ();
2040	0					0	for my $range (@singleoctet_ignorecase) {
2041	0	0				0	if (ref $range) {
2042	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
2043	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
2044							}
2045							elsif (scalar(@{$range}) == 2) {
2046	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2047							}
2048							else {
2049	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2050							}
2051							}
2052							}
2053							}
2054
2055							# return character list
2056	0	0				0	if (scalar(@multipleoctet) >= 1) {
2057	0	0				0	if (scalar(@singleoctet) >= 1) {
2058
2059							# any character other than multiple-octet and single octet character class
2060	0					0	return '(?!' . join('\|', @multipleoctet) . ')(?:[^' . join('', @singleoctet) . '])';
2061							}
2062							else {
2063
2064							# any character other than multiple-octet character class
2065	0					0	return '(?!' . join('\|', @multipleoctet) . ")(?:$your_char)";
2066							}
2067							}
2068							else {
2069	0	0				0	if (scalar(@singleoctet) >= 1) {
2070
2071							# any character other than single octet character class
2072	0					0	return '(?:[^' . join('', @singleoctet) . '])';
2073							}
2074							else {
2075
2076							# any character
2077	0					0	return "(?:$your_char)";
2078							}
2079							}
2080							}
2081
2082							#
2083							# open file in read mode
2084							#
2085							sub _open_r {
2086	177			177		645	my(undef,$file) = @_;
2087	177					1002	$file =~ s#\A (\s) #./$1#oxms;
2088	177		33			19860	return eval(q{open($_[0],'<',$_[1])}) \|\|
2089							open($_[0],"< $file\0");
2090							}
2091
2092							#
2093							# open file in write mode
2094							#
2095							sub _open_w {
2096	0			0		0	my(undef,$file) = @_;
2097	0					0	$file =~ s#\A (\s) #./$1#oxms;
2098	0		0			0	return eval(q{open($_[0],'>',$_[1])}) \|\|
2099							open($_[0],"> $file\0");
2100							}
2101
2102							#
2103							# open file in append mode
2104							#
2105							sub _open_a {
2106	0			0		0	my(undef,$file) = @_;
2107	0					0	$file =~ s#\A (\s) #./$1#oxms;
2108	0		0			0	return eval(q{open($_[0],'>>',$_[1])}) \|\|
2109							open($_[0],">> $file\0");
2110							}
2111
2112							#
2113							# safe system
2114							#
2115							sub _systemx {
2116
2117							# P.707 29.2.33. exec
2118							# in Chapter 29: Functions
2119							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2120							#
2121							# Be aware that in older releases of Perl, exec (and system) did not flush
2122							# your output buffer, so you needed to enable command buffering by setting $\|
2123							# on one or more filehandles to avoid lost output in the case of exec, or
2124							# misordererd output in the case of system. This situation was largely remedied
2125							# in the 5.6 release of Perl. (So, 5.005 release not yet.)
2126
2127							# P.855 exec
2128							# in Chapter 27: Functions
2129							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2130							#
2131							# In very old release of Perl (before v5.6), exec (and system) did not flush
2132							# your output buffer, so you needed to enable command buffering by setting $\|
2133							# on one or more filehandles to avoid lost output with exec or misordered
2134							# output with system.
2135
2136	177			177		819	$\| = 1;
2137
2138							# P.565 23.1.2. Cleaning Up Your Environment
2139							# in Chapter 23: Security
2140							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2141
2142							# P.656 Cleaning Up Your Environment
2143							# in Chapter 20: Security
2144							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2145
2146							# local $ENV{'PATH'} = '.';
2147	177					2385	local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer
2148
2149							# P.707 29.2.33. exec
2150							# in Chapter 29: Functions
2151							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2152							#
2153							# As we mentioned earlier, exec treats a discrete list of arguments as an
2154							# indication that it should bypass shell processing. However, there is one
2155							# place where you might still get tripped up. The exec call (and system, too)
2156							# will not distinguish between a single scalar argument and an array containing
2157							# only one element.
2158							#
2159							# @args = ("echo surprise"); # just one element in list
2160							# exec @args # still subject to shell escapes
2161							# or die "exec: $!"; # because @args == 1
2162							#
2163							# To avoid this, you can use the PATHNAME syntax, explicitly duplicating the
2164							# first argument as the pathname, which forces the rest of the arguments to be
2165							# interpreted as a list, even if there is only one of them:
2166							#
2167							# exec { $args[0] } @args # safe even with one-argument list
2168							# or die "can't exec @args: $!";
2169
2170							# P.855 exec
2171							# in Chapter 27: Functions
2172							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2173							#
2174							# As we mentioned earlier, exec treats a discrete list of arguments as a
2175							# directive to bypass shell processing. However, there is one place where
2176							# you might still get tripped up. The exec call (and system, too) cannot
2177							# distinguish between a single scalar argument and an array containing
2178							# only one element.
2179							#
2180							# @args = ("echo surprise"); # just one element in list
2181							# exec @args # still subject to shell escapes
2182							# \|\| die "exec: $!"; # because @args == 1
2183							#
2184							# To avoid this, use the PATHNAME syntax, explicitly duplicating the first
2185							# argument as the pathname, which forces the rest of the arguments to be
2186							# interpreted as a list, even if there is only one of them:
2187							#
2188							# exec { $args[0] } @args # safe even with one-argument list
2189							# \|\| die "can't exec @args: $!";
2190
2191	177					527	return CORE::system { $_[0] } @_; # safe even with one-argument list
	177					14902637
2192							}
2193
2194							#
2195							# Windows-1258 order to character (with parameter)
2196							#
2197							sub Char::Ewindows1258::chr(;$) {
2198
2199	0	0		0	0		my $c = @_ ? $_[0] : $_;
2200
2201	0	0					if ($c == 0x00) {
2202	0						return "\x00";
2203							}
2204							else {
2205	0						my @chr = ();
2206	0						while ($c > 0) {
2207	0						unshift @chr, ($c % 0x100);
2208	0						$c = int($c / 0x100);
2209							}
2210	0						return pack 'C*', @chr;
2211							}
2212							}
2213
2214							#
2215							# Windows-1258 order to character (without parameter)
2216							#
2217							sub Char::Ewindows1258::chr_() {
2218
2219	0			0	0		my $c = $_;
2220
2221	0	0					if ($c == 0x00) {
2222	0						return "\x00";
2223							}
2224							else {
2225	0						my @chr = ();
2226	0						while ($c > 0) {
2227	0						unshift @chr, ($c % 0x100);
2228	0						$c = int($c / 0x100);
2229							}
2230	0						return pack 'C*', @chr;
2231							}
2232							}
2233
2234							#
2235							# Windows-1258 path globbing (with parameter)
2236							#
2237							sub Char::Ewindows1258::glob($) {
2238
2239	0	0		0	0		if (wantarray) {
2240	0						my @glob = _DOS_like_glob(@_);
2241	0						for my $glob (@glob) {
2242	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2243							}
2244	0						return @glob;
2245							}
2246							else {
2247	0						my $glob = _DOS_like_glob(@_);
2248	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2249	0						return $glob;
2250							}
2251							}
2252
2253							#
2254							# Windows-1258 path globbing (without parameter)
2255							#
2256							sub Char::Ewindows1258::glob_() {
2257
2258	0	0		0	0		if (wantarray) {
2259	0						my @glob = _DOS_like_glob();
2260	0						for my $glob (@glob) {
2261	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2262							}
2263	0						return @glob;
2264							}
2265							else {
2266	0						my $glob = _DOS_like_glob();
2267	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2268	0						return $glob;
2269							}
2270							}
2271
2272							#
2273							# Windows-1258 path globbing via File::DosGlob 1.10
2274							#
2275							# Often I confuse "_dosglob" and "_doglob".
2276							# So, I renamed "_dosglob" to "_DOS_like_glob".
2277							#
2278							my %iter;
2279							my %entries;
2280							sub _DOS_like_glob {
2281
2282							# context (keyed by second cxix argument provided by core)
2283	0			0			my($expr,$cxix) = @_;
2284
2285							# glob without args defaults to $_
2286	0	0					$expr = $_ if not defined $expr;
2287
2288							# represents the current user's home directory
2289							#
2290							# 7.3. Expanding Tildes in Filenames
2291							# in Chapter 7. File Access
2292							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2293							#
2294							# and File::HomeDir, File::HomeDir::Windows module
2295
2296							# DOS-like system
2297	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2298	0						$expr =~ s{ \A ~ (?= [^/\\] ) }
2299	0						{ my_home_MSWin32() }oxmse;
2300							}
2301
2302							# UNIX-like system
2303							else {
2304	0						$expr =~ s{ \A ~ ( (?:[^/])* ) }
2305	0	0	0				{ $1 ? (eval(q{(getpwnam($1))[7]})\|\|my_home()) : my_home() }oxmse;
2306							}
2307
2308							# assume global context if not provided one
2309	0	0					$cxix = '_G_' if not defined $cxix;
2310	0	0					$iter{$cxix} = 0 if not exists $iter{$cxix};
2311
2312							# if we're just beginning, do it all first
2313	0	0					if ($iter{$cxix} == 0) {
2314	0						$entries{$cxix} = [ _do_glob(1, _parse_line($expr)) ];
2315							}
2316
2317							# chuck it all out, quick or slow
2318	0	0					if (wantarray) {
2319	0						delete $iter{$cxix};
2320	0						return @{delete $entries{$cxix}};
	0
2321							}
2322							else {
2323	0	0					if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
	0
2324	0						return shift @{$entries{$cxix}};
	0
2325							}
2326							else {
2327							# return undef for EOL
2328	0						delete $iter{$cxix};
2329	0						delete $entries{$cxix};
2330	0						return undef;
2331							}
2332							}
2333							}
2334
2335							#
2336							# Windows-1258 path globbing subroutine
2337							#
2338							sub _do_glob {
2339
2340	0			0			my($cond,@expr) = @_;
2341	0						my @glob = ();
2342	0						my $fix_drive_relative_paths = 0;
2343
2344							OUTER:
2345	0						for my $expr (@expr) {
2346	0	0					next OUTER if not defined $expr;
2347	0	0					next OUTER if $expr eq '';
2348
2349	0						my @matched = ();
2350	0						my @globdir = ();
2351	0						my $head = '.';
2352	0						my $pathsep = '/';
2353	0						my $tail;
2354
2355							# if argument is within quotes strip em and do no globbing
2356	0	0					if ($expr =~ /\A " ((?:$q_char)*) " \z/oxms) {
2357	0						$expr = $1;
2358	0	0					if ($cond eq 'd') {
2359	0	0					if (-d $expr) {
2360	0						push @glob, $expr;
2361							}
2362							}
2363							else {
2364	0	0					if (-e $expr) {
2365	0						push @glob, $expr;
2366							}
2367							}
2368	0						next OUTER;
2369							}
2370
2371							# wildcards with a drive prefix such as h:*.pm must be changed
2372							# to h:./*.pm to expand correctly
2373	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2374	0	0					if ($expr =~ s# \A ((?:[A-Za-z]:)?) ([^/\\]) #$1./$2#oxms) {
2375	0						$fix_drive_relative_paths = 1;
2376							}
2377							}
2378
2379	0	0					if (($head, $tail) = _parse_path($expr,$pathsep)) {
2380	0	0					if ($tail eq '') {
2381	0						push @glob, $expr;
2382	0						next OUTER;
2383							}
2384	0	0					if ($head =~ / \A (?:$q_char)? [?] /oxms) {
2385	0	0					if (@globdir = _do_glob('d', $head)) {
2386	0						push @glob, _do_glob($cond, map {"$_$pathsep$tail"} @globdir);
	0
2387	0						next OUTER;
2388							}
2389							}
2390	0	0	0				if ($head eq '' or $head =~ /\A [A-Za-z]: \z/oxms) {
2391	0						$head .= $pathsep;
2392							}
2393	0						$expr = $tail;
2394							}
2395
2396							# If file component has no wildcards, we can avoid opendir
2397	0	0					if ($expr !~ / \A (?:$q_char)? [?] /oxms) {
2398	0	0					if ($head eq '.') {
2399	0						$head = '';
2400							}
2401	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2402	0						$head .= $pathsep;
2403							}
2404	0						$head .= $expr;
2405	0	0					if ($cond eq 'd') {
2406	0	0					if (-d $head) {
2407	0						push @glob, $head;
2408							}
2409							}
2410							else {
2411	0	0					if (-e $head) {
2412	0						push @glob, $head;
2413							}
2414							}
2415	0						next OUTER;
2416							}
2417	0	0					opendir(*DIR, $head) or next OUTER;
2418	0						my @leaf = readdir DIR;
2419	0						closedir DIR;
2420
2421	0	0					if ($head eq '.') {
2422	0						$head = '';
2423							}
2424	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2425	0						$head .= $pathsep;
2426							}
2427
2428	0						my $pattern = '';
2429	0						while ($expr =~ / \G ($q_char) /oxgc) {
2430	0						my $char = $1;
2431
2432							# 6.9. Matching Shell Globs as Regular Expressions
2433							# in Chapter 6. Pattern Matching
2434							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2435							# (and so on)
2436
2437	0	0					if ($char eq '*') {
		0
		0
2438	0						$pattern .= "(?:$your_char)*",
2439							}
2440							elsif ($char eq '?') {
2441	0						$pattern .= "(?:$your_char)?", # DOS style
2442							# $pattern .= "(?:$your_char)", # UNIX style
2443							}
2444							elsif ((my $fc = Char::Ewindows1258::fc($char)) ne $char) {
2445	0						$pattern .= $fc;
2446							}
2447							else {
2448	0						$pattern .= quotemeta $char;
2449							}
2450							}
2451	0			0			my $matchsub = sub { Char::Ewindows1258::fc($_[0]) =~ /\A $pattern \z/xms };
	0
2452
2453							# if ($@) {
2454							# print STDERR "$0: $@\n";
2455							# next OUTER;
2456							# }
2457
2458							INNER:
2459	0						for my $leaf (@leaf) {
2460	0	0	0				if ($leaf eq '.' or $leaf eq '..') {
2461	0						next INNER;
2462							}
2463	0	0	0				if ($cond eq 'd' and not -d "$head$leaf") {
2464	0						next INNER;
2465							}
2466
2467	0	0					if (&$matchsub($leaf)) {
2468	0						push @matched, "$head$leaf";
2469	0						next INNER;
2470							}
2471
2472							# [DOS compatibility special case]
2473							# Failed, add a trailing dot and try again, but only...
2474
2475	0	0	0				if (Char::Ewindows1258::index($leaf,'.') == -1 and # if name does not have a dot in it and
			0
2476							CORE::length($leaf) <= 8 and # name is shorter than or equal to 8 chars and
2477							Char::Ewindows1258::index($pattern,'\\.') != -1 # pattern has a dot.
2478							) {
2479	0	0					if (&$matchsub("$leaf.")) {
2480	0						push @matched, "$head$leaf";
2481	0						next INNER;
2482							}
2483							}
2484							}
2485	0	0					if (@matched) {
2486	0						push @glob, @matched;
2487							}
2488							}
2489	0	0					if ($fix_drive_relative_paths) {
2490	0						for my $glob (@glob) {
2491	0						$glob =~ s# \A ([A-Za-z]:) \./ #$1#oxms;
2492							}
2493							}
2494	0						return @glob;
2495							}
2496
2497							#
2498							# Windows-1258 parse line
2499							#
2500							sub _parse_line {
2501
2502	0			0			my($line) = @_;
2503
2504	0						$line .= ' ';
2505	0						my @piece = ();
2506	0						while ($line =~ /
2507							" ( (?: [^"] )* ) " \s+ \|
2508							( (?: [^"\s] )* ) \s+
2509							/oxmsg
2510							) {
2511	0	0					push @piece, defined($1) ? $1 : $2;
2512							}
2513	0						return @piece;
2514							}
2515
2516							#
2517							# Windows-1258 parse path
2518							#
2519							sub _parse_path {
2520
2521	0			0			my($path,$pathsep) = @_;
2522
2523	0						$path .= '/';
2524	0						my @subpath = ();
2525	0						while ($path =~ /
2526							((?: [^\/\\] )+?) [\/\\]
2527							/oxmsg
2528							) {
2529	0						push @subpath, $1;
2530							}
2531
2532	0						my $tail = pop @subpath;
2533	0						my $head = join $pathsep, @subpath;
2534	0						return $head, $tail;
2535							}
2536
2537							#
2538							# via File::HomeDir::Windows 1.00
2539							#
2540							sub my_home_MSWin32 {
2541
2542							# A lot of unix people and unix-derived tools rely on
2543							# the ability to overload HOME. We will support it too
2544							# so that they can replace raw HOME calls with File::HomeDir.
2545	0	0	0	0	0		if (exists $ENV{'HOME'} and $ENV{'HOME'}) {
		0	0
		0	0
			0
			0
2546	0						return $ENV{'HOME'};
2547							}
2548
2549							# Do we have a user profile?
2550							elsif (exists $ENV{'USERPROFILE'} and $ENV{'USERPROFILE'}) {
2551	0						return $ENV{'USERPROFILE'};
2552							}
2553
2554							# Some Windows use something like $ENV{'HOME'}
2555							elsif (exists $ENV{'HOMEDRIVE'} and exists $ENV{'HOMEPATH'} and $ENV{'HOMEDRIVE'} and $ENV{'HOMEPATH'}) {
2556	0						return join '', $ENV{'HOMEDRIVE'}, $ENV{'HOMEPATH'};
2557							}
2558
2559	0						return undef;
2560							}
2561
2562							#
2563							# via File::HomeDir::Unix 1.00
2564							#
2565							sub my_home {
2566	0			0	0		my $home;
2567
2568	0	0	0				if (exists $ENV{'HOME'} and defined $ENV{'HOME'}) {
		0	0
2569	0						$home = $ENV{'HOME'};
2570							}
2571
2572							# This is from the original code, but I'm guessing
2573							# it means "login directory" and exists on some Unixes.
2574							elsif (exists $ENV{'LOGDIR'} and $ENV{'LOGDIR'}) {
2575	0						$home = $ENV{'LOGDIR'};
2576							}
2577
2578							### More-desperate methods
2579
2580							# Light desperation on any (Unixish) platform
2581							else {
2582	0						$home = eval q{ (getpwuid($<))[7] };
2583							}
2584
2585							# On Unix in general, a non-existant home means "no home"
2586							# For example, "nobody"-like users might use /nonexistant
2587	0	0	0				if (defined $home and ! -d($home)) {
2588	0						$home = undef;
2589							}
2590	0						return $home;
2591							}
2592
2593							#
2594							# ${^PREMATCH}, $PREMATCH, $` the string preceding what was matched
2595							#
2596							sub Char::Ewindows1258::PREMATCH {
2597	0			0	0		return $`;
2598							}
2599
2600							#
2601							# ${^MATCH}, $MATCH, $& the string that matched
2602							#
2603							sub Char::Ewindows1258::MATCH {
2604	0			0	0		return $&;
2605							}
2606
2607							#
2608							# ${^POSTMATCH}, $POSTMATCH, $' the string following what was matched
2609							#
2610							sub Char::Ewindows1258::POSTMATCH {
2611	0			0	0		return $';
2612							}
2613
2614							#
2615							# Windows-1258 character to order (with parameter)
2616							#
2617							sub Char::Windows1258::ord(;$) {
2618
2619	0	0		0	1		local $_ = shift if @_;
2620
2621	0	0					if (/\A ($q_char) /oxms) {
2622	0						my @ord = unpack 'C*', $1;
2623	0						my $ord = 0;
2624	0						while (my $o = shift @ord) {
2625	0						$ord = $ord * 0x100 + $o;
2626							}
2627	0						return $ord;
2628							}
2629							else {
2630	0						return CORE::ord $_;
2631							}
2632							}
2633
2634							#
2635							# Windows-1258 character to order (without parameter)
2636							#
2637							sub Char::Windows1258::ord_() {
2638
2639	0	0		0	0		if (/\A ($q_char) /oxms) {
2640	0						my @ord = unpack 'C*', $1;
2641	0						my $ord = 0;
2642	0						while (my $o = shift @ord) {
2643	0						$ord = $ord * 0x100 + $o;
2644							}
2645	0						return $ord;
2646							}
2647							else {
2648	0						return CORE::ord $_;
2649							}
2650							}
2651
2652							#
2653							# Windows-1258 reverse
2654							#
2655							sub Char::Windows1258::reverse(@) {
2656
2657	0	0		0	0		if (wantarray) {
2658	0						return CORE::reverse @_;
2659							}
2660							else {
2661
2662							# One of us once cornered Larry in an elevator and asked him what
2663							# problem he was solving with this, but he looked as far off into
2664							# the distance as he could in an elevator and said, "It seemed like
2665							# a good idea at the time."
2666
2667	0						return join '', CORE::reverse(join('',@_) =~ /\G ($q_char) /oxmsg);
2668							}
2669							}
2670
2671							#
2672							# Windows-1258 getc (with parameter, without parameter)
2673							#
2674							sub Char::Windows1258::getc(;*@) {
2675
2676	0			0	0		my($package) = caller;
2677	0	0					my $fh = @_ ? qualify_to_ref(shift,$package) : \*STDIN;
2678	0	0	0				croak 'Too many arguments for Char::Windows1258::getc' if @_ and not wantarray;
2679
2680	0						my @length = sort { $a <=> $b } keys %range_tr;
	0
2681	0						my $getc = '';
2682	0						for my $length ($length[0] .. $length[-1]) {
2683	0						$getc .= CORE::getc($fh);
2684	0	0					if (exists $range_tr{CORE::length($getc)}) {
2685	0	0					if ($getc =~ /\A ${Char::Ewindows1258::dot_s} \z/oxms) {
2686	0	0					return wantarray ? ($getc,@_) : $getc;
2687							}
2688							}
2689							}
2690	0	0					return wantarray ? ($getc,@_) : $getc;
2691							}
2692
2693							#
2694							# Windows-1258 length by character
2695							#
2696							sub Char::Windows1258::length(;$) {
2697
2698	0	0		0	1		local $_ = shift if @_;
2699
2700	0						local @_ = /\G ($q_char) /oxmsg;
2701	0						return scalar @_;
2702							}
2703
2704							#
2705							# Windows-1258 substr by character
2706							#
2707							BEGIN {
2708
2709							# P.232 The lvalue Attribute
2710							# in Chapter 6: Subroutines
2711							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2712
2713							# P.336 The lvalue Attribute
2714							# in Chapter 7: Subroutines
2715							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2716
2717							# P.144 8.4 Lvalue subroutines
2718							# in Chapter 8: perlsub: Perl subroutines
2719							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
2720
2721	177	50	0	177	1	317651	eval sprintf(<<'END', ($] >= 5.014000) ? ':lvalue' : '');
	0	0		0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
2722							# vv----------------*******
2723							sub Char::Windows1258::substr($$;$$) %s {
2724
2725							my @char = $_[0] =~ /\G ($q_char) /oxmsg;
2726
2727							# If the substring is beyond either end of the string, substr() returns the undefined
2728							# value and produces a warning. When used as an lvalue, specifying a substring that
2729							# is entirely outside the string raises an exception.
2730							# http://perldoc.perl.org/functions/substr.html
2731
2732							# A return with no argument returns the scalar value undef in scalar context,
2733							# an empty list () in list context, and (naturally) nothing at all in void
2734							# context.
2735
2736							my $offset = $_[1];
2737							if (($offset > scalar(@char)) or ($offset < (-1 * scalar(@char)))) {
2738							return;
2739							}
2740
2741							# substr($string,$offset,$length,$replacement)
2742							if (@_ == 4) {
2743							my(undef,undef,$length,$replacement) = @_;
2744							my $substr = join '', splice(@char, $offset, $length, $replacement);
2745							$_[0] = join '', @char;
2746
2747							# return $substr; this doesn't work, don't say "return"
2748							$substr;
2749							}
2750
2751							# substr($string,$offset,$length)
2752							elsif (@_ == 3) {
2753							my(undef,undef,$length) = @_;
2754							my $octet_offset = 0;
2755							my $octet_length = 0;
2756							if ($offset == 0) {
2757							$octet_offset = 0;
2758							}
2759							elsif ($offset > 0) {
2760							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2761							}
2762							else {
2763							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2764							}
2765							if ($length == 0) {
2766							$octet_length = 0;
2767							}
2768							elsif ($length > 0) {
2769							$octet_length = CORE::length(join '', @char[$offset..$offset+$length-1]);
2770							}
2771							else {
2772							$octet_length = -1 * CORE::length(join '', @char[$#char+$length+1..$#char]);
2773							}
2774							CORE::substr($_[0], $octet_offset, $octet_length);
2775							}
2776
2777							# substr($string,$offset)
2778							else {
2779							my $octet_offset = 0;
2780							if ($offset == 0) {
2781							$octet_offset = 0;
2782							}
2783							elsif ($offset > 0) {
2784							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2785							}
2786							else {
2787							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2788							}
2789							CORE::substr($_[0], $octet_offset);
2790							}
2791							}
2792							END
2793							}
2794
2795							#
2796							# Windows-1258 index by character
2797							#
2798							sub Char::Windows1258::index($$;$) {
2799
2800	0			0	1		my $index;
2801	0	0					if (@_ == 3) {
2802	0						$index = Char::Ewindows1258::index($_[0], $_[1], CORE::length(Char::Windows1258::substr($_[0], 0, $_[2])));
2803							}
2804							else {
2805	0						$index = Char::Ewindows1258::index($_[0], $_[1]);
2806							}
2807
2808	0	0					if ($index == -1) {
2809	0						return -1;
2810							}
2811							else {
2812	0						return Char::Windows1258::length(CORE::substr $_[0], 0, $index);
2813							}
2814							}
2815
2816							#
2817							# Windows-1258 rindex by character
2818							#
2819							sub Char::Windows1258::rindex($$;$) {
2820
2821	0			0	1		my $rindex;
2822	0	0					if (@_ == 3) {
2823	0						$rindex = Char::Ewindows1258::rindex($_[0], $_[1], CORE::length(Char::Windows1258::substr($_[0], 0, $_[2])));
2824							}
2825							else {
2826	0						$rindex = Char::Ewindows1258::rindex($_[0], $_[1]);
2827							}
2828
2829	0	0					if ($rindex == -1) {
2830	0						return -1;
2831							}
2832							else {
2833	0						return Char::Windows1258::length(CORE::substr $_[0], 0, $rindex);
2834							}
2835							}
2836
2837							#
2838							# instead of Carp::carp
2839							#
2840							sub carp {
2841	0			0	0		my($package,$filename,$line) = caller(1);
2842	0						print STDERR "@_ at $filename line $line.\n";
2843							}
2844
2845							#
2846							# instead of Carp::croak
2847							#
2848							sub croak {
2849	0			0	0		my($package,$filename,$line) = caller(1);
2850	0						print STDERR "@_ at $filename line $line.\n";
2851	0						die "\n";
2852							}
2853
2854							#
2855							# instead of Carp::cluck
2856							#
2857							sub cluck {
2858	0			0	0		my $i = 0;
2859	0						my @cluck = ();
2860	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2861	0						push @cluck, "[$i] $filename($line) $package::$subroutine\n";
2862	0						$i++;
2863							}
2864	0						print STDERR CORE::reverse @cluck;
2865	0						print STDERR "\n";
2866	0						carp @_;
2867							}
2868
2869							#
2870							# instead of Carp::confess
2871							#
2872							sub confess {
2873	0			0	0		my $i = 0;
2874	0						my @confess = ();
2875	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2876	0						push @confess, "[$i] $filename($line) $package::$subroutine\n";
2877	0						$i++;
2878							}
2879	0						print STDERR CORE::reverse @confess;
2880	0						print STDERR "\n";
2881	0						croak @_;
2882							}
2883
2884							1;
2885
2886							__END__