File Coverage

Char/Elatin1.pm

Criterion	Covered	Total	%
statement	51	954	5.3
branch	4	562	0.7
condition	1	180	0.5
subroutine	20	85	23.5
pod	7	50	14.0
total	83	1831	4.5

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# This file is * FOR CPAN USE ONLY *.
3							#
4							package Char::Elatin1;
5							######################################################################
6							#
7							# Char::Elatin1 - Run-time routines for Char/Latin1.pm
8							#
9							# http://search.cpan.org/dist/Char-Latin1/
10							#
11							# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 INABA Hitoshi
12							######################################################################
13
14	177			177		3702	use 5.00503; # Galapagos Consensus 1998 for primetools
	177					590
	177					8983
15							# use 5.008001; # Lancaster Consensus 2013 for toolchains
16
17							# 12.3. Delaying use Until Runtime
18							# in Chapter 12. Packages, Libraries, and Modules
19							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
20							# (and so on)
21
22	177			177		12399	BEGIN { eval q{ use vars qw($VERSION) } }
	177			177		1001
	177					361
	177					31112
23							$VERSION = sprintf '%d.%02d', q$Revision: 1.01 $ =~ /(\d+)/xmsg;
24
25							BEGIN {
26	177	50		177		1099	if ($^X =~ / jperl /oxmsi) {
27	0					0	die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)";
28							}
29	177					292	if (CORE::ord('A') == 193) {
30							die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).";
31							}
32	177					30923	if (CORE::ord('A') != 0x41) {
33							die __FILE__, ": is not US-ASCII script (must be US-ASCII script).";
34							}
35							}
36
37							BEGIN {
38
39							# instead of utf8.pm
40	177			177		17710	eval q{
	177			177		1043
	177			55		324
	177					29510
	55					8673
	70					12690
	59					10681
	59					12290
	59					9816
	52					8572
41							no warnings qw(redefine);
42							*utf8::upgrade = sub { CORE::length $_[0] };
43							*utf8::downgrade = sub { 1 };
44							*utf8::encode = sub { };
45							*utf8::decode = sub { 1 };
46							*utf8::is_utf8 = sub { };
47							*utf8::valid = sub { 1 };
48							};
49	177	50				128900	if ($@) {
50	0					0	*utf8::upgrade = sub { CORE::length $_[0] };
	0					0
51	0					0	*utf8::downgrade = sub { 1 };
	0					0
52	0					0	*utf8::encode = sub { };
	0					0
53	0					0	*utf8::decode = sub { 1 };
	0					0
54	0					0	*utf8::is_utf8 = sub { };
	0					0
55	0					0	*utf8::valid = sub { 1 };
	0					0
56							}
57							}
58
59							# instead of Symbol.pm
60							BEGIN {
61	177			177		474	my $genpkg = "Symbol::";
62	177					8829	my $genseq = 0;
63
64							sub gensym () {
65	0			0	0	0	my $name = "GEN" . $genseq++;
66
67							# here, no strict qw(refs); if strict.pm exists
68
69	0					0	my $ref = \*{$genpkg . $name};
	0					0
70	0					0	delete $$genpkg{$name};
71	0					0	return $ref;
72							}
73
74							sub qualify ($;$) {
75	0			0	0	0	my ($name) = @_;
76	0	0	0			0	if (!ref($name) && (Char::Elatin1::index($name, '::') == -1) && (Char::Elatin1::index($name, "'") == -1)) {
			0
77	0					0	my $pkg;
78	0					0	my %global = map {$_ => 1} qw(ARGV ARGVOUT ENV INC SIG STDERR STDIN STDOUT DATA);
	0					0
79
80							# Global names: special character, "^xyz", or other.
81	0	0	0			0	if ($name =~ /^(([^a-z])\|(\^[a-z_]+))\z/i \|\| $global{$name}) {
82							# RGS 2001-11-05 : translate leading ^X to control-char
83	0					0	$name =~ s/^\^([a-z_])/'qq(\c'.$1.')'/eei;
	0					0
84	0					0	$pkg = "main";
85							}
86							else {
87	0	0				0	$pkg = (@_ > 1) ? $_[1] : caller;
88							}
89	0					0	$name = $pkg . "::" . $name;
90							}
91	0					0	return $name;
92							}
93
94							sub qualify_to_ref ($;$) {
95
96							# here, no strict qw(refs); if strict.pm exists
97
98	0	0		0	0	0	return \*{ qualify $_[0], @_ > 1 ? $_[1] : caller };
	0					0
99							}
100							}
101
102							# Column: local $@
103							# in Chapter 9. Osaete okitai Perl no kiso
104							# of ISBN 10: 4798119172 \| ISBN 13: 978-4798119175 MODAN Perl NYUMON
105							# (and so on)
106
107							# use strict; if strict.pm exists
108							BEGIN {
109	177	50		177		556	if (eval { local $@; CORE::require strict }) {
	177					455
	177					3127
110	177					25055	strict::->import;
111							}
112							}
113
114							# P.714 29.2.39. flock
115							# in Chapter 29: Functions
116							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
117
118							# P.863 flock
119							# in Chapter 27: Functions
120							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
121
122							sub LOCK_SH() {1}
123							sub LOCK_EX() {2}
124							sub LOCK_UN() {8}
125							sub LOCK_NB() {4}
126
127							# instead of Carp.pm
128							sub carp;
129							sub croak;
130							sub cluck;
131							sub confess;
132
133							my $your_char = q{[\x00-\xFF]};
134
135							# regexp of character
136	177			177		11794	BEGIN { eval q{ use vars qw($q_char) } }
	177			177		6747
	177					345
	177					13468
137							$q_char = qr/$your_char/oxms;
138
139							#
140							# Latin-1 character range per length
141							#
142							my %range_tr = ();
143
144							#
145							# alias of encoding name
146							#
147	177			177		10170	BEGIN { eval q{ use vars qw($encoding_alias) } }
	177			177		1078
	177					371
	177					378234
148
149							#
150							# Latin-1 case conversion
151							#
152							my %lc = ();
153							@lc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
154							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
155							my %uc = ();
156							@uc{qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)} =
157							qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
158							my %fc = ();
159							@fc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
160							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
161
162							if (0) {
163							}
164
165							elsif (__PACKAGE__ =~ / \b Elatin1 \z/oxms) {
166							%range_tr = (
167							1 => [ [0x00..0xFF],
168							],
169							);
170							$encoding_alias = qr/ \b (?: iso[-_ ]?8859-1 \| iec[- ]?8859-1 \| latin-?1 ) \b /oxmsi;
171
172							%lc = (%lc,
173							"\xC0" => "\xE0", # LATIN LETTER A WITH GRAVE
174							"\xC1" => "\xE1", # LATIN LETTER A WITH ACUTE
175							"\xC2" => "\xE2", # LATIN LETTER A WITH CIRCUMFLEX
176							"\xC3" => "\xE3", # LATIN LETTER A WITH TILDE
177							"\xC4" => "\xE4", # LATIN LETTER A WITH DIAERESIS
178							"\xC5" => "\xE5", # LATIN LETTER A WITH RING ABOVE
179							"\xC6" => "\xE6", # LATIN LETTER AE
180							"\xC7" => "\xE7", # LATIN LETTER C WITH CEDILLA
181							"\xC8" => "\xE8", # LATIN LETTER E WITH GRAVE
182							"\xC9" => "\xE9", # LATIN LETTER E WITH ACUTE
183							"\xCA" => "\xEA", # LATIN LETTER E WITH CIRCUMFLEX
184							"\xCB" => "\xEB", # LATIN LETTER E WITH DIAERESIS
185							"\xCC" => "\xEC", # LATIN LETTER I WITH GRAVE
186							"\xCD" => "\xED", # LATIN LETTER I WITH ACUTE
187							"\xCE" => "\xEE", # LATIN LETTER I WITH CIRCUMFLEX
188							"\xCF" => "\xEF", # LATIN LETTER I WITH DIAERESIS
189							"\xD0" => "\xF0", # LATIN LETTER ETH (Icelandic)
190							"\xD1" => "\xF1", # LATIN LETTER N WITH TILDE
191							"\xD2" => "\xF2", # LATIN LETTER O WITH GRAVE
192							"\xD3" => "\xF3", # LATIN LETTER O WITH ACUTE
193							"\xD4" => "\xF4", # LATIN LETTER O WITH CIRCUMFLEX
194							"\xD5" => "\xF5", # LATIN LETTER O WITH TILDE
195							"\xD6" => "\xF6", # LATIN LETTER O WITH DIAERESIS
196							"\xD8" => "\xF8", # LATIN LETTER O WITH STROKE
197							"\xD9" => "\xF9", # LATIN LETTER U WITH GRAVE
198							"\xDA" => "\xFA", # LATIN LETTER U WITH ACUTE
199							"\xDB" => "\xFB", # LATIN LETTER U WITH CIRCUMFLEX
200							"\xDC" => "\xFC", # LATIN LETTER U WITH DIAERESIS
201							"\xDD" => "\xFD", # LATIN LETTER Y WITH ACUTE
202							"\xDE" => "\xFE", # LATIN LETTER THORN (Icelandic)
203							);
204
205							%uc = (%uc,
206							"\xE0" => "\xC0", # LATIN LETTER A WITH GRAVE
207							"\xE1" => "\xC1", # LATIN LETTER A WITH ACUTE
208							"\xE2" => "\xC2", # LATIN LETTER A WITH CIRCUMFLEX
209							"\xE3" => "\xC3", # LATIN LETTER A WITH TILDE
210							"\xE4" => "\xC4", # LATIN LETTER A WITH DIAERESIS
211							"\xE5" => "\xC5", # LATIN LETTER A WITH RING ABOVE
212							"\xE6" => "\xC6", # LATIN LETTER AE
213							"\xE7" => "\xC7", # LATIN LETTER C WITH CEDILLA
214							"\xE8" => "\xC8", # LATIN LETTER E WITH GRAVE
215							"\xE9" => "\xC9", # LATIN LETTER E WITH ACUTE
216							"\xEA" => "\xCA", # LATIN LETTER E WITH CIRCUMFLEX
217							"\xEB" => "\xCB", # LATIN LETTER E WITH DIAERESIS
218							"\xEC" => "\xCC", # LATIN LETTER I WITH GRAVE
219							"\xED" => "\xCD", # LATIN LETTER I WITH ACUTE
220							"\xEE" => "\xCE", # LATIN LETTER I WITH CIRCUMFLEX
221							"\xEF" => "\xCF", # LATIN LETTER I WITH DIAERESIS
222							"\xF0" => "\xD0", # LATIN LETTER ETH (Icelandic)
223							"\xF1" => "\xD1", # LATIN LETTER N WITH TILDE
224							"\xF2" => "\xD2", # LATIN LETTER O WITH GRAVE
225							"\xF3" => "\xD3", # LATIN LETTER O WITH ACUTE
226							"\xF4" => "\xD4", # LATIN LETTER O WITH CIRCUMFLEX
227							"\xF5" => "\xD5", # LATIN LETTER O WITH TILDE
228							"\xF6" => "\xD6", # LATIN LETTER O WITH DIAERESIS
229							"\xF8" => "\xD8", # LATIN LETTER O WITH STROKE
230							"\xF9" => "\xD9", # LATIN LETTER U WITH GRAVE
231							"\xFA" => "\xDA", # LATIN LETTER U WITH ACUTE
232							"\xFB" => "\xDB", # LATIN LETTER U WITH CIRCUMFLEX
233							"\xFC" => "\xDC", # LATIN LETTER U WITH DIAERESIS
234							"\xFD" => "\xDD", # LATIN LETTER Y WITH ACUTE
235							"\xFE" => "\xDE", # LATIN LETTER THORN (Icelandic)
236							);
237
238							%fc = (%fc,
239							"\xC0" => "\xE0", # LATIN CAPITAL LETTER A WITH GRAVE --> LATIN SMALL LETTER A WITH GRAVE
240							"\xC1" => "\xE1", # LATIN CAPITAL LETTER A WITH ACUTE --> LATIN SMALL LETTER A WITH ACUTE
241							"\xC2" => "\xE2", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX --> LATIN SMALL LETTER A WITH CIRCUMFLEX
242							"\xC3" => "\xE3", # LATIN CAPITAL LETTER A WITH TILDE --> LATIN SMALL LETTER A WITH TILDE
243							"\xC4" => "\xE4", # LATIN CAPITAL LETTER A WITH DIAERESIS --> LATIN SMALL LETTER A WITH DIAERESIS
244							"\xC5" => "\xE5", # LATIN CAPITAL LETTER A WITH RING ABOVE --> LATIN SMALL LETTER A WITH RING ABOVE
245							"\xC6" => "\xE6", # LATIN CAPITAL LETTER AE --> LATIN SMALL LETTER AE
246							"\xC7" => "\xE7", # LATIN CAPITAL LETTER C WITH CEDILLA --> LATIN SMALL LETTER C WITH CEDILLA
247							"\xC8" => "\xE8", # LATIN CAPITAL LETTER E WITH GRAVE --> LATIN SMALL LETTER E WITH GRAVE
248							"\xC9" => "\xE9", # LATIN CAPITAL LETTER E WITH ACUTE --> LATIN SMALL LETTER E WITH ACUTE
249							"\xCA" => "\xEA", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX --> LATIN SMALL LETTER E WITH CIRCUMFLEX
250							"\xCB" => "\xEB", # LATIN CAPITAL LETTER E WITH DIAERESIS --> LATIN SMALL LETTER E WITH DIAERESIS
251							"\xCC" => "\xEC", # LATIN CAPITAL LETTER I WITH GRAVE --> LATIN SMALL LETTER I WITH GRAVE
252							"\xCD" => "\xED", # LATIN CAPITAL LETTER I WITH ACUTE --> LATIN SMALL LETTER I WITH ACUTE
253							"\xCE" => "\xEE", # LATIN CAPITAL LETTER I WITH CIRCUMFLEX --> LATIN SMALL LETTER I WITH CIRCUMFLEX
254							"\xCF" => "\xEF", # LATIN CAPITAL LETTER I WITH DIAERESIS --> LATIN SMALL LETTER I WITH DIAERESIS
255							"\xD0" => "\xF0", # LATIN CAPITAL LETTER ETH --> LATIN SMALL LETTER ETH
256							"\xD1" => "\xF1", # LATIN CAPITAL LETTER N WITH TILDE --> LATIN SMALL LETTER N WITH TILDE
257							"\xD2" => "\xF2", # LATIN CAPITAL LETTER O WITH GRAVE --> LATIN SMALL LETTER O WITH GRAVE
258							"\xD3" => "\xF3", # LATIN CAPITAL LETTER O WITH ACUTE --> LATIN SMALL LETTER O WITH ACUTE
259							"\xD4" => "\xF4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX --> LATIN SMALL LETTER O WITH CIRCUMFLEX
260							"\xD5" => "\xF5", # LATIN CAPITAL LETTER O WITH TILDE --> LATIN SMALL LETTER O WITH TILDE
261							"\xD6" => "\xF6", # LATIN CAPITAL LETTER O WITH DIAERESIS --> LATIN SMALL LETTER O WITH DIAERESIS
262							"\xD8" => "\xF8", # LATIN CAPITAL LETTER O WITH STROKE --> LATIN SMALL LETTER O WITH STROKE
263							"\xD9" => "\xF9", # LATIN CAPITAL LETTER U WITH GRAVE --> LATIN SMALL LETTER U WITH GRAVE
264							"\xDA" => "\xFA", # LATIN CAPITAL LETTER U WITH ACUTE --> LATIN SMALL LETTER U WITH ACUTE
265							"\xDB" => "\xFB", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX --> LATIN SMALL LETTER U WITH CIRCUMFLEX
266							"\xDC" => "\xFC", # LATIN CAPITAL LETTER U WITH DIAERESIS --> LATIN SMALL LETTER U WITH DIAERESIS
267							"\xDD" => "\xFD", # LATIN CAPITAL LETTER Y WITH ACUTE --> LATIN SMALL LETTER Y WITH ACUTE
268							"\xDE" => "\xFE", # LATIN CAPITAL LETTER THORN --> LATIN SMALL LETTER THORN
269							"\xDF" => "\x73\x73", # LATIN SMALL LETTER SHARP S --> LATIN SMALL LETTER S, LATIN SMALL LETTER S
270							);
271							}
272
273							else {
274							croak "Don't know my package name '@{[__PACKAGE__]}'";
275							}
276
277							#
278							# @ARGV wildcard globbing
279							#
280							sub import {
281
282	0	0		0		0	if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
283	0					0	my @argv = ();
284	0					0	for (@ARGV) {
285
286							# has space
287	0	0				0	if (/\A (?:$q_char)*? [ ] /oxms) {
		0
288	0	0				0	if (my @glob = Char::Elatin1::glob(qq{"$_"})) {
289	0					0	push @argv, @glob;
290							}
291							else {
292	0					0	push @argv, $_;
293							}
294							}
295
296							# has wildcard metachar
297							elsif (/\A (?:$q_char)? [?] /oxms) {
298	0	0				0	if (my @glob = Char::Elatin1::glob($_)) {
299	0					0	push @argv, @glob;
300							}
301							else {
302	0					0	push @argv, $_;
303							}
304							}
305
306							# no wildcard globbing
307							else {
308	0					0	push @argv, $_;
309							}
310							}
311	0					0	@ARGV = @argv;
312							}
313							}
314
315							# P.230 Care with Prototypes
316							# in Chapter 6: Subroutines
317							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
318							#
319							# If you aren't careful, you can get yourself into trouble with prototypes.
320							# But if you are careful, you can do a lot of neat things with them. This is
321							# all very powerful, of course, and should only be used in moderation to make
322							# the world a better place.
323
324							# P.332 Care with Prototypes
325							# in Chapter 7: Subroutines
326							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
327							#
328							# If you aren't careful, you can get yourself into trouble with prototypes.
329							# But if you are careful, you can do a lot of neat things with them. This is
330							# all very powerful, of course, and should only be used in moderation to make
331							# the world a better place.
332
333							#
334							# Prototypes of subroutines
335							#
336	0			0		0	sub unimport {}
337							sub Char::Elatin1::split(;$$$);
338							sub Char::Elatin1::tr($$$$;$);
339							sub Char::Elatin1::chop(@);
340							sub Char::Elatin1::index($$;$);
341							sub Char::Elatin1::rindex($$;$);
342							sub Char::Elatin1::lcfirst(@);
343							sub Char::Elatin1::lcfirst_();
344							sub Char::Elatin1::lc(@);
345							sub Char::Elatin1::lc_();
346							sub Char::Elatin1::ucfirst(@);
347							sub Char::Elatin1::ucfirst_();
348							sub Char::Elatin1::uc(@);
349							sub Char::Elatin1::uc_();
350							sub Char::Elatin1::fc(@);
351							sub Char::Elatin1::fc_();
352							sub Char::Elatin1::ignorecase;
353							sub Char::Elatin1::classic_character_class;
354							sub Char::Elatin1::capture;
355							sub Char::Elatin1::chr(;$);
356							sub Char::Elatin1::chr_();
357							sub Char::Elatin1::glob($);
358							sub Char::Elatin1::glob_();
359
360							sub Char::Latin1::ord(;$);
361							sub Char::Latin1::ord_();
362							sub Char::Latin1::reverse(@);
363							sub Char::Latin1::getc(;*@);
364							sub Char::Latin1::length(;$);
365							sub Char::Latin1::substr($$;$$);
366							sub Char::Latin1::index($$;$);
367							sub Char::Latin1::rindex($$;$);
368
369							#
370							# Regexp work
371							#
372	177			177		15425	BEGIN { eval q{ use vars qw(
	177			177		1427
	177					364
	177					85704
373							$Char::Latin1::re_a
374							$Char::Latin1::re_t
375							$Char::Latin1::re_n
376							$Char::Latin1::re_r
377							) } }
378
379							#
380							# Character class
381							#
382	177			177		19833	BEGIN { eval q{ use vars qw(
	177			177		1395
	177					326
	177					3348103
383							$dot
384							$dot_s
385							$eD
386							$eS
387							$eW
388							$eH
389							$eV
390							$eR
391							$eN
392							$not_alnum
393							$not_alpha
394							$not_ascii
395							$not_blank
396							$not_cntrl
397							$not_digit
398							$not_graph
399							$not_lower
400							$not_lower_i
401							$not_print
402							$not_punct
403							$not_space
404							$not_upper
405							$not_upper_i
406							$not_word
407							$not_xdigit
408							$eb
409							$eB
410							) } }
411
412							${Char::Elatin1::dot} = qr{(?:[^\x0A])};
413							${Char::Elatin1::dot_s} = qr{(?:[\x00-\xFF])};
414							${Char::Elatin1::eD} = qr{(?:[^0-9])};
415
416							# Vertical tabs are now whitespace
417							# \s in a regex now matches a vertical tab in all circumstances.
418							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
419							# ${Char::Elatin1::eS} = qr{(?:[^\x09\x0A \x0C\x0D\x20])};
420							# ${Char::Elatin1::eS} = qr{(?:[^\x09\x0A\x0B\x0C\x0D\x20])};
421							${Char::Elatin1::eS} = qr{(?:[^\s])};
422
423							${Char::Elatin1::eW} = qr{(?:[^0-9A-Z_a-z])};
424							${Char::Elatin1::eH} = qr{(?:[^\x09\x20])};
425							${Char::Elatin1::eV} = qr{(?:[^\x0A\x0B\x0C\x0D])};
426							${Char::Elatin1::eR} = qr{(?:\x0D\x0A\|[\x0A\x0D])};
427							${Char::Elatin1::eN} = qr{(?:[^\x0A])};
428							${Char::Elatin1::not_alnum} = qr{(?:[^\x30-\x39\x41-\x5A\x61-\x7A])};
429							${Char::Elatin1::not_alpha} = qr{(?:[^\x41-\x5A\x61-\x7A])};
430							${Char::Elatin1::not_ascii} = qr{(?:[^\x00-\x7F])};
431							${Char::Elatin1::not_blank} = qr{(?:[^\x09\x20])};
432							${Char::Elatin1::not_cntrl} = qr{(?:[^\x00-\x1F\x7F])};
433							${Char::Elatin1::not_digit} = qr{(?:[^\x30-\x39])};
434							${Char::Elatin1::not_graph} = qr{(?:[^\x21-\x7F])};
435							${Char::Elatin1::not_lower} = qr{(?:[^\x61-\x7A])};
436							${Char::Elatin1::not_lower_i} = qr{(?:[\x00-\xFF])};
437							${Char::Elatin1::not_print} = qr{(?:[^\x20-\x7F])};
438							${Char::Elatin1::not_punct} = qr{(?:[^\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E])};
439							${Char::Elatin1::not_space} = qr{(?:[^\s\x0B])};
440							${Char::Elatin1::not_upper} = qr{(?:[^\x41-\x5A])};
441							${Char::Elatin1::not_upper_i} = qr{(?:[\x00-\xFF])};
442							${Char::Elatin1::not_word} = qr{(?:[^\x30-\x39\x41-\x5A\x5F\x61-\x7A])};
443							${Char::Elatin1::not_xdigit} = qr{(?:[^\x30-\x39\x41-\x46\x61-\x66])};
444							${Char::Elatin1::eb} = qr{(?:\A(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[0-9A-Z_a-z])\|(?<=[0-9A-Z_a-z])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]\|\z))};
445							${Char::Elatin1::eB} = qr{(?:(?<=[0-9A-Z_a-z])(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]))};
446
447							# avoid: Name "Char::Elatin1::foo" used only once: possible typo at here.
448							${Char::Elatin1::dot} = ${Char::Elatin1::dot};
449							${Char::Elatin1::dot_s} = ${Char::Elatin1::dot_s};
450							${Char::Elatin1::eD} = ${Char::Elatin1::eD};
451							${Char::Elatin1::eS} = ${Char::Elatin1::eS};
452							${Char::Elatin1::eW} = ${Char::Elatin1::eW};
453							${Char::Elatin1::eH} = ${Char::Elatin1::eH};
454							${Char::Elatin1::eV} = ${Char::Elatin1::eV};
455							${Char::Elatin1::eR} = ${Char::Elatin1::eR};
456							${Char::Elatin1::eN} = ${Char::Elatin1::eN};
457							${Char::Elatin1::not_alnum} = ${Char::Elatin1::not_alnum};
458							${Char::Elatin1::not_alpha} = ${Char::Elatin1::not_alpha};
459							${Char::Elatin1::not_ascii} = ${Char::Elatin1::not_ascii};
460							${Char::Elatin1::not_blank} = ${Char::Elatin1::not_blank};
461							${Char::Elatin1::not_cntrl} = ${Char::Elatin1::not_cntrl};
462							${Char::Elatin1::not_digit} = ${Char::Elatin1::not_digit};
463							${Char::Elatin1::not_graph} = ${Char::Elatin1::not_graph};
464							${Char::Elatin1::not_lower} = ${Char::Elatin1::not_lower};
465							${Char::Elatin1::not_lower_i} = ${Char::Elatin1::not_lower_i};
466							${Char::Elatin1::not_print} = ${Char::Elatin1::not_print};
467							${Char::Elatin1::not_punct} = ${Char::Elatin1::not_punct};
468							${Char::Elatin1::not_space} = ${Char::Elatin1::not_space};
469							${Char::Elatin1::not_upper} = ${Char::Elatin1::not_upper};
470							${Char::Elatin1::not_upper_i} = ${Char::Elatin1::not_upper_i};
471							${Char::Elatin1::not_word} = ${Char::Elatin1::not_word};
472							${Char::Elatin1::not_xdigit} = ${Char::Elatin1::not_xdigit};
473							${Char::Elatin1::eb} = ${Char::Elatin1::eb};
474							${Char::Elatin1::eB} = ${Char::Elatin1::eB};
475
476							#
477							# Latin-1 split
478							#
479							sub Char::Elatin1::split(;$$$) {
480
481							# P.794 29.2.161. split
482							# in Chapter 29: Functions
483							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
484
485							# P.951 split
486							# in Chapter 27: Functions
487							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
488
489	0			0	0	0	my $pattern = $_[0];
490	0					0	my $string = $_[1];
491	0					0	my $limit = $_[2];
492
493							# if $pattern is also omitted or is the literal space, " "
494	0	0				0	if (not defined $pattern) {
495	0					0	$pattern = ' ';
496							}
497
498							# if $string is omitted, the function splits the $_ string
499	0	0				0	if (not defined $string) {
500	0	0				0	if (defined $_) {
501	0					0	$string = $_;
502							}
503							else {
504	0					0	$string = '';
505							}
506							}
507
508	0					0	my @split = ();
509
510							# when string is empty
511	0	0				0	if ($string eq '') {
		0
512
513							# resulting list value in list context
514	0	0				0	if (wantarray) {
515	0					0	return @split;
516							}
517
518							# count of substrings in scalar context
519							else {
520	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
521	0					0	@_ = @split;
522	0					0	return scalar @_;
523							}
524							}
525
526							# split's first argument is more consistently interpreted
527							#
528							# After some changes earlier in v5.17, split's behavior has been simplified:
529							# if the PATTERN argument evaluates to a string containing one space, it is
530							# treated the way that a literal string containing one space once was.
531							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#split's_first_argument_is_more_consistently_interpreted
532
533							# if $pattern is also omitted or is the literal space, " ", the function splits
534							# on whitespace, /\s+/, after skipping any leading whitespace
535							# (and so on)
536
537							elsif ($pattern eq ' ') {
538	0	0				0	if (not defined $limit) {
539	0					0	return CORE::split(' ', $string);
540							}
541							else {
542	0					0	return CORE::split(' ', $string, $limit);
543							}
544							}
545
546							# if $limit is negative, it is treated as if an arbitrarily large $limit has been specified
547	0	0	0			0	if ((not defined $limit) or ($limit <= 0)) {
		0
548
549							# a pattern capable of matching either the null string or something longer than the
550							# null string will split the value of $string into separate characters wherever it
551							# matches the null string between characters
552							# (and so on)
553
554	0	0				0	if ('' =~ / \A $pattern \z /xms) {
555	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
556	0					0	my $limit = scalar(() = $string =~ /($pattern)/oxmsg);
557
558							# P.1024 Appendix W.10 Multibyte Processing
559							# of ISBN 1-56592-224-7 CJKV Information Processing
560							# (and so on)
561
562							# the //m modifier is assumed when you split on the pattern /^/
563							# (and so on)
564
565							# V
566	0		0			0	while ((--$limit > 0) and ($string =~ s/\A((?:$q_char)+?)$pattern//m)) {
567
568							# if the $pattern contains parentheses, then the substring matched by each pair of parentheses
569							# is included in the resulting list, interspersed with the fields that are ordinarily returned
570							# (and so on)
571
572	0					0	local $@;
573	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
574	0					0	push @split, eval('$' . $digit);
575							}
576							}
577							}
578
579							else {
580	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
581
582							# V
583	0					0	while ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
584	0					0	local $@;
585	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
586	0					0	push @split, eval('$' . $digit);
587							}
588							}
589							}
590							}
591
592							elsif ($limit > 0) {
593	0	0				0	if ('' =~ / \A $pattern \z /xms) {
594	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
595	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
596
597							# V
598	0	0				0	if ($string =~ s/\A((?:$q_char)+?)$pattern//m) {
599	0					0	local $@;
600	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
601	0					0	push @split, eval('$' . $digit);
602							}
603							}
604							}
605							}
606							else {
607	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
608	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
609
610							# V
611	0	0				0	if ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
612	0					0	local $@;
613	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
614	0					0	push @split, eval('$' . $digit);
615							}
616							}
617							}
618							}
619							}
620
621	0	0				0	if (CORE::length($string) > 0) {
622	0					0	push @split, $string;
623							}
624
625							# if $_[2] (NOT "$limit") is omitted or zero, trailing null fields are stripped from the result
626	0	0	0			0	if ((not defined $_[2]) or ($_[2] == 0)) {
627	0		0			0	while ((scalar(@split) >= 1) and ($split[-1] eq '')) {
628	0					0	pop @split;
629							}
630							}
631
632							# resulting list value in list context
633	0	0				0	if (wantarray) {
634	0					0	return @split;
635							}
636
637							# count of substrings in scalar context
638							else {
639	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
640	0					0	@_ = @split;
641	0					0	return scalar @_;
642							}
643							}
644
645							#
646							# get last subexpression offsets
647							#
648							sub _last_subexpression_offsets {
649	0			0		0	my $pattern = $_[0];
650
651							# remove comment
652	0					0	$pattern =~ s/$\?\# .*? $//oxmsg;
653
654	0					0	my $modifier = '';
655	0	0				0	if ($pattern =~ /\(\?\^? ([\-A-Za-z]+) :/oxms) {
656	0					0	$modifier = $1;
657	0					0	$modifier =~ s/-[A-Za-z]*//;
658							}
659
660							# with /x modifier
661	0					0	my @char = ();
662	0	0				0	if ($modifier =~ /x/oxms) {
663	0					0	@char = $pattern =~ /\G(
664							\\ (?:$q_char) \|
665							\# (?:$q_char)*? $ \|
666							\[ (?: \\\] \| (?:$q_char))+? \] \|
667							\(\? \|
668							(?:$q_char)
669							)/oxmsg;
670							}
671
672							# without /x modifier
673							else {
674	0					0	@char = $pattern =~ /\G(
675							\\ (?:$q_char) \|
676							\[ (?: \\\] \| (?:$q_char))+? \] \|
677							\(\? \|
678							(?:$q_char)
679							)/oxmsg;
680							}
681
682	0					0	return scalar grep { $_ eq '(' } @char;
	0					0
683							}
684
685							#
686							# Latin-1 transliteration (tr///)
687							#
688							sub Char::Elatin1::tr($$$$;$) {
689
690	0			0	0	0	my $bind_operator = $_[1];
691	0					0	my $searchlist = $_[2];
692	0					0	my $replacementlist = $_[3];
693	0		0			0	my $modifier = $_[4] \|\| '';
694
695	0	0				0	if ($modifier =~ /r/oxms) {
696	0	0				0	if ($bind_operator =~ / !~ /oxms) {
697	0					0	croak "Using !~ with tr///r doesn't make sense";
698							}
699							}
700
701	0					0	my @char = $_[0] =~ /\G ($q_char) /oxmsg;
702	0					0	my @searchlist = _charlist_tr($searchlist);
703	0					0	my @replacementlist = _charlist_tr($replacementlist);
704
705	0					0	my %tr = ();
706	0					0	for (my $i=0; $i <= $#searchlist; $i++) {
707	0	0				0	if (not exists $tr{$searchlist[$i]}) {
708	0	0	0			0	if (defined $replacementlist[$i] and ($replacementlist[$i] ne '')) {
		0	0
		0
709	0					0	$tr{$searchlist[$i]} = $replacementlist[$i];
710							}
711							elsif ($modifier =~ /d/oxms) {
712	0					0	$tr{$searchlist[$i]} = '';
713							}
714							elsif (defined $replacementlist[-1] and ($replacementlist[-1] ne '')) {
715	0					0	$tr{$searchlist[$i]} = $replacementlist[-1];
716							}
717							else {
718	0					0	$tr{$searchlist[$i]} = $searchlist[$i];
719							}
720							}
721							}
722
723	0					0	my $tr = 0;
724	0					0	my $replaced = '';
725	0	0				0	if ($modifier =~ /c/oxms) {
726	0					0	while (defined(my $char = shift @char)) {
727	0	0				0	if (not exists $tr{$char}) {
728	0	0				0	if (defined $replacementlist[0]) {
729	0					0	$replaced .= $replacementlist[0];
730							}
731	0					0	$tr++;
732	0	0				0	if ($modifier =~ /s/oxms) {
733	0		0			0	while (@char and (not exists $tr{$char[0]})) {
734	0					0	shift @char;
735	0					0	$tr++;
736							}
737							}
738							}
739							else {
740	0					0	$replaced .= $char;
741							}
742							}
743							}
744							else {
745	0					0	while (defined(my $char = shift @char)) {
746	0	0				0	if (exists $tr{$char}) {
747	0					0	$replaced .= $tr{$char};
748	0					0	$tr++;
749	0	0				0	if ($modifier =~ /s/oxms) {
750	0		0			0	while (@char and (exists $tr{$char[0]}) and ($tr{$char[0]} eq $tr{$char})) {
			0
751	0					0	shift @char;
752	0					0	$tr++;
753							}
754							}
755							}
756							else {
757	0					0	$replaced .= $char;
758							}
759							}
760							}
761
762	0	0				0	if ($modifier =~ /r/oxms) {
763	0					0	return $replaced;
764							}
765							else {
766	0					0	$_[0] = $replaced;
767	0	0				0	if ($bind_operator =~ / !~ /oxms) {
768	0					0	return not $tr;
769							}
770							else {
771	0					0	return $tr;
772							}
773							}
774							}
775
776							#
777							# Latin-1 chop
778							#
779							sub Char::Elatin1::chop(@) {
780
781	0			0	0	0	my $chop;
782	0	0				0	if (@_ == 0) {
783	0					0	my @char = /\G ($q_char) /oxmsg;
784	0					0	$chop = pop @char;
785	0					0	$_ = join '', @char;
786							}
787							else {
788	0					0	for (@_) {
789	0					0	my @char = /\G ($q_char) /oxmsg;
790	0					0	$chop = pop @char;
791	0					0	$_ = join '', @char;
792							}
793							}
794	0					0	return $chop;
795							}
796
797							#
798							# Latin-1 index by octet
799							#
800							sub Char::Elatin1::index($$;$) {
801
802	0			0	1	0	my($str,$substr,$position) = @_;
803	0		0			0	$position \|\|= 0;
804	0					0	my $pos = 0;
805
806	0					0	while ($pos < CORE::length($str)) {
807	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
808	0	0				0	if ($pos >= $position) {
809	0					0	return $pos;
810							}
811							}
812	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
813	0					0	$pos += CORE::length($1);
814							}
815							else {
816	0					0	$pos += 1;
817							}
818							}
819	0					0	return -1;
820							}
821
822							#
823							# Latin-1 reverse index
824							#
825							sub Char::Elatin1::rindex($$;$) {
826
827	0			0	0	0	my($str,$substr,$position) = @_;
828	0		0			0	$position \|\|= CORE::length($str) - 1;
829	0					0	my $pos = 0;
830	0					0	my $rindex = -1;
831
832	0		0			0	while (($pos < CORE::length($str)) and ($pos <= $position)) {
833	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
834	0					0	$rindex = $pos;
835							}
836	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
837	0					0	$pos += CORE::length($1);
838							}
839							else {
840	0					0	$pos += 1;
841							}
842							}
843	0					0	return $rindex;
844							}
845
846							#
847							# Latin-1 lower case first with parameter
848							#
849							sub Char::Elatin1::lcfirst(@) {
850	0	0		0	0	0	if (@_) {
851	0					0	my $s = shift @_;
852	0	0	0			0	if (@_ and wantarray) {
853	0					0	return Char::Elatin1::lc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
854							}
855							else {
856	0					0	return Char::Elatin1::lc(CORE::substr($s,0,1)) . CORE::substr($s,1);
857							}
858							}
859							else {
860	0					0	return Char::Elatin1::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
861							}
862							}
863
864							#
865							# Latin-1 lower case first without parameter
866							#
867							sub Char::Elatin1::lcfirst_() {
868	0			0	0	0	return Char::Elatin1::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
869							}
870
871							#
872							# Latin-1 lower case with parameter
873							#
874							sub Char::Elatin1::lc(@) {
875	0	0		0	0	0	if (@_) {
876	0					0	my $s = shift @_;
877	0	0	0			0	if (@_ and wantarray) {
878	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
879							}
880							else {
881	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
882							}
883							}
884							else {
885	0					0	return Char::Elatin1::lc_();
886							}
887							}
888
889							#
890							# Latin-1 lower case without parameter
891							#
892							sub Char::Elatin1::lc_() {
893	0			0	0	0	my $s = $_;
894	0	0				0	return join '', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
895							}
896
897							#
898							# Latin-1 upper case first with parameter
899							#
900							sub Char::Elatin1::ucfirst(@) {
901	0	0		0	0	0	if (@_) {
902	0					0	my $s = shift @_;
903	0	0	0			0	if (@_ and wantarray) {
904	0					0	return Char::Elatin1::uc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
905							}
906							else {
907	0					0	return Char::Elatin1::uc(CORE::substr($s,0,1)) . CORE::substr($s,1);
908							}
909							}
910							else {
911	0					0	return Char::Elatin1::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
912							}
913							}
914
915							#
916							# Latin-1 upper case first without parameter
917							#
918							sub Char::Elatin1::ucfirst_() {
919	0			0	0	0	return Char::Elatin1::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
920							}
921
922							#
923							# Latin-1 upper case with parameter
924							#
925							sub Char::Elatin1::uc(@) {
926	0	0		0	0	0	if (@_) {
927	0					0	my $s = shift @_;
928	0	0	0			0	if (@_ and wantarray) {
929	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
930							}
931							else {
932	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
933							}
934							}
935							else {
936	0					0	return Char::Elatin1::uc_();
937							}
938							}
939
940							#
941							# Latin-1 upper case without parameter
942							#
943							sub Char::Elatin1::uc_() {
944	0			0	0	0	my $s = $_;
945	0	0				0	return join '', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
946							}
947
948							#
949							# Latin-1 fold case with parameter
950							#
951							sub Char::Elatin1::fc(@) {
952	0	0		0	0	0	if (@_) {
953	0					0	my $s = shift @_;
954	0	0	0			0	if (@_ and wantarray) {
955	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
956							}
957							else {
958	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
959							}
960							}
961							else {
962	0					0	return Char::Elatin1::fc_();
963							}
964							}
965
966							#
967							# Latin-1 fold case without parameter
968							#
969							sub Char::Elatin1::fc_() {
970	0			0	0	0	my $s = $_;
971	0	0				0	return join '', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
972							}
973
974							#
975							# Latin-1 regexp capture
976							#
977							{
978							sub Char::Elatin1::capture {
979	0			0	1	0	return $_[0];
980							}
981							}
982
983							#
984							# Latin-1 regexp ignore case modifier
985							#
986							sub Char::Elatin1::ignorecase {
987
988	0			0	0	0	my @string = @_;
989	0					0	my $metachar = qr/[\@\\\|[\]{]/oxms;
990
991							# ignore case of $scalar or @array
992	0					0	for my $string (@string) {
993
994							# split regexp
995	0					0	my @char = $string =~ /\G(
996							\[\^ \|
997							\\? (?:$q_char)
998							)/oxmsg;
999
1000							# unescape character
1001	0					0	for (my $i=0; $i <= $#char; $i++) {
1002	0	0				0	next if not defined $char[$i];
1003
1004							# open character class [...]
1005	0	0				0	if ($char[$i] eq '[') {
		0
		0
		0
1006	0					0	my $left = $i;
1007
1008							# [] make die "unmatched [] in regexp ..."
1009
1010	0	0				0	if ($char[$i+1] eq ']') {
1011	0					0	$i++;
1012							}
1013
1014	0					0	while (1) {
1015	0	0				0	if (++$i > $#char) {
1016	0					0	croak "Unmatched [] in regexp";
1017							}
1018	0	0				0	if ($char[$i] eq ']') {
1019	0					0	my $right = $i;
1020	0					0	my @charlist = charlist_qr(@char[$left+1..$right-1], 'i');
1021
1022							# escape character
1023	0					0	for my $char (@charlist) {
1024	0	0				0	if (0) {
1025							}
1026
1027	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1028	0					0	$char = $1 . '\\' . $char;
1029							}
1030							}
1031
1032							# [...]
1033	0					0	splice @char, $left, $right-$left+1, '(?:' . join('\|', @charlist) . ')';
1034
1035	0					0	$i = $left;
1036	0					0	last;
1037							}
1038							}
1039							}
1040
1041							# open character class [^...]
1042							elsif ($char[$i] eq '[^') {
1043	0					0	my $left = $i;
1044
1045							# [^] make die "unmatched [] in regexp ..."
1046
1047	0	0				0	if ($char[$i+1] eq ']') {
1048	0					0	$i++;
1049							}
1050
1051	0					0	while (1) {
1052	0	0				0	if (++$i > $#char) {
1053	0					0	croak "Unmatched [] in regexp";
1054							}
1055	0	0				0	if ($char[$i] eq ']') {
1056	0					0	my $right = $i;
1057	0					0	my @charlist = charlist_not_qr(@char[$left+1..$right-1], 'i');
1058
1059							# escape character
1060	0					0	for my $char (@charlist) {
1061	0	0				0	if (0) {
1062							}
1063
1064	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1065	0					0	$char = '\\' . $char;
1066							}
1067							}
1068
1069							# [^...]
1070	0					0	splice @char, $left, $right-$left+1, '(?!' . join('\|', @charlist) . ")(?:$your_char)";
1071
1072	0					0	$i = $left;
1073	0					0	last;
1074							}
1075							}
1076							}
1077
1078							# rewrite classic character class or escape character
1079							elsif (my $char = classic_character_class($char[$i])) {
1080	0					0	$char[$i] = $char;
1081							}
1082
1083							# with /i modifier
1084							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1085	0					0	my $uc = Char::Elatin1::uc($char[$i]);
1086	0					0	my $fc = Char::Elatin1::fc($char[$i]);
1087	0	0				0	if ($uc ne $fc) {
1088	0	0				0	if (CORE::length($fc) == 1) {
1089	0					0	$char[$i] = '[' . $uc . $fc . ']';
1090							}
1091							else {
1092	0					0	$char[$i] = '(?:' . $uc . '\|' . $fc . ')';
1093							}
1094							}
1095							}
1096							}
1097
1098							# characterize
1099	0					0	for (my $i=0; $i <= $#char; $i++) {
1100	0	0				0	next if not defined $char[$i];
1101
1102	0	0				0	if (0) {
1103							}
1104
1105							# quote character before ? + * {
1106	0	0				0	elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
1107	0	0				0	if ($char[$i-1] !~ /\A [\x00-\xFF] \z/oxms) {
1108	0					0	$char[$i-1] = '(?:' . $char[$i-1] . ')';
1109							}
1110							}
1111							}
1112
1113	0					0	$string = join '', @char;
1114							}
1115
1116							# make regexp string
1117	0					0	return @string;
1118							}
1119
1120							#
1121							# classic character class ( \D \S \W \d \s \w \C \X \H \V \h \v \R \N \b \B )
1122							#
1123							sub Char::Elatin1::classic_character_class {
1124	0			0	0	0	my($char) = @_;
1125
1126							return {
1127	0		0			0	'\D' => '${Char::Elatin1::eD}',
1128							'\S' => '${Char::Elatin1::eS}',
1129							'\W' => '${Char::Elatin1::eW}',
1130							'\d' => '[0-9]',
1131
1132							# Before Perl 5.6, \s only matched the five whitespace characters
1133							# tab, newline, form-feed, carriage return, and the space character
1134							# itself, which, taken together, is the character class [\t\n\f\r ].
1135
1136							# Vertical tabs are now whitespace
1137							# \s in a regex now matches a vertical tab in all circumstances.
1138							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1139							# \t \n \v \f \r space
1140							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1141							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1142							'\s' => '\s',
1143
1144							'\w' => '[0-9A-Z_a-z]',
1145							'\C' => '[\x00-\xFF]',
1146							'\X' => 'X',
1147
1148							# \h \v \H \V
1149
1150							# P.114 Character Class Shortcuts
1151							# in Chapter 7: In the World of Regular Expressions
1152							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
1153
1154							# P.357 13.2.3 Whitespace
1155							# in Chapter 13: perlrecharclass: Perl Regular Expression Character Classes
1156							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1157							#
1158							# 0x00009 CHARACTER TABULATION h s
1159							# 0x0000a LINE FEED (LF) vs
1160							# 0x0000b LINE TABULATION v
1161							# 0x0000c FORM FEED (FF) vs
1162							# 0x0000d CARRIAGE RETURN (CR) vs
1163							# 0x00020 SPACE h s
1164
1165							# P.196 Table 5-9. Alphanumeric regex metasymbols
1166							# in Chapter 5. Pattern Matching
1167							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1168
1169							# (and so on)
1170
1171							'\H' => '${Char::Elatin1::eH}',
1172							'\V' => '${Char::Elatin1::eV}',
1173							'\h' => '[\x09\x20]',
1174							'\v' => '[\x0A\x0B\x0C\x0D]',
1175							'\R' => '${Char::Elatin1::eR}',
1176
1177							# \N
1178							#
1179							# http://perldoc.perl.org/perlre.html
1180							# Character Classes and other Special Escapes
1181							# Any character but \n (experimental). Not affected by /s modifier
1182
1183							'\N' => '${Char::Elatin1::eN}',
1184
1185							# \b \B
1186
1187							# P.180 Boundaries: The \b and \B Assertions
1188							# in Chapter 5: Pattern Matching
1189							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1190
1191							# P.219 Boundaries: The \b and \B Assertions
1192							# in Chapter 5: Pattern Matching
1193							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1194
1195							# '\b' => '(?:(?<=\A\|\W)(?=\w)\|(?<=\w)(?=\W\|\z))',
1196							'\b' => '${Char::Elatin1::eb}',
1197
1198							# '\B' => '(?:(?<=\w)(?=\w)\|(?<=\W)(?=\W))',
1199							'\B' => '${Char::Elatin1::eB}',
1200
1201							}->{$char} \|\| '';
1202							}
1203
1204							#
1205							# prepare Latin-1 characters per length
1206							#
1207
1208							# 1 octet characters
1209							my @chars1 = ();
1210							sub chars1 {
1211	0	0		0	0	0	if (@chars1) {
1212	0					0	return @chars1;
1213							}
1214	0	0				0	if (exists $range_tr{1}) {
1215	0					0	my @ranges = @{ $range_tr{1} };
	0					0
1216	0					0	while (my @range = splice(@ranges,0,1)) {
1217	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1218	0					0	push @chars1, pack 'C', $oct0;
1219							}
1220							}
1221							}
1222	0					0	return @chars1;
1223							}
1224
1225							# 2 octets characters
1226							my @chars2 = ();
1227							sub chars2 {
1228	0	0		0	0	0	if (@chars2) {
1229	0					0	return @chars2;
1230							}
1231	0	0				0	if (exists $range_tr{2}) {
1232	0					0	my @ranges = @{ $range_tr{2} };
	0					0
1233	0					0	while (my @range = splice(@ranges,0,2)) {
1234	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1235	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1236	0					0	push @chars2, pack 'CC', $oct0,$oct1;
1237							}
1238							}
1239							}
1240							}
1241	0					0	return @chars2;
1242							}
1243
1244							# 3 octets characters
1245							my @chars3 = ();
1246							sub chars3 {
1247	0	0		0	0	0	if (@chars3) {
1248	0					0	return @chars3;
1249							}
1250	0	0				0	if (exists $range_tr{3}) {
1251	0					0	my @ranges = @{ $range_tr{3} };
	0					0
1252	0					0	while (my @range = splice(@ranges,0,3)) {
1253	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1254	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1255	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1256	0					0	push @chars3, pack 'CCC', $oct0,$oct1,$oct2;
1257							}
1258							}
1259							}
1260							}
1261							}
1262	0					0	return @chars3;
1263							}
1264
1265							# 4 octets characters
1266							my @chars4 = ();
1267							sub chars4 {
1268	0	0		0	0	0	if (@chars4) {
1269	0					0	return @chars4;
1270							}
1271	0	0				0	if (exists $range_tr{4}) {
1272	0					0	my @ranges = @{ $range_tr{4} };
	0					0
1273	0					0	while (my @range = splice(@ranges,0,4)) {
1274	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1275	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1276	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1277	0					0	for my $oct3 (@{$range[3]}) {
	0					0
1278	0					0	push @chars4, pack 'CCCC', $oct0,$oct1,$oct2,$oct3;
1279							}
1280							}
1281							}
1282							}
1283							}
1284							}
1285	0					0	return @chars4;
1286							}
1287
1288							#
1289							# Latin-1 open character list for tr
1290							#
1291							sub _charlist_tr {
1292
1293	0			0		0	local $_ = shift @_;
1294
1295							# unescape character
1296	0					0	my @char = ();
1297	0					0	while (not /\G \z/oxmsgc) {
1298	0	0				0	if (/\G (\\0?55\|\\x2[Dd]\|\\-) /oxmsgc) {
		0
		0
		0
		0
		0
		0
1299	0					0	push @char, '\-';
1300							}
1301							elsif (/\G \\ ([0-7]{2,3}) /oxmsgc) {
1302	0					0	push @char, CORE::chr(oct $1);
1303							}
1304							elsif (/\G \\x ([0-9A-Fa-f]{1,2}) /oxmsgc) {
1305	0					0	push @char, CORE::chr(hex $1);
1306							}
1307							elsif (/\G \\c ([\x40-\x5F]) /oxmsgc) {
1308	0					0	push @char, CORE::chr(CORE::ord($1) & 0x1F);
1309							}
1310							elsif (/\G (\\ [0nrtfbae]) /oxmsgc) {
1311	0					0	push @char, {
1312							'\0' => "\0",
1313							'\n' => "\n",
1314							'\r' => "\r",
1315							'\t' => "\t",
1316							'\f' => "\f",
1317							'\b' => "\x08", # \b means backspace in character class
1318							'\a' => "\a",
1319							'\e' => "\e",
1320							}->{$1};
1321							}
1322							elsif (/\G \\ ($q_char) /oxmsgc) {
1323	0					0	push @char, $1;
1324							}
1325							elsif (/\G ($q_char) /oxmsgc) {
1326	0					0	push @char, $1;
1327							}
1328							}
1329
1330							# join separated multiple-octet
1331	0					0	@char = join('',@char) =~ /\G (\\-\|$q_char) /oxmsg;
1332
1333							# unescape '-'
1334	0					0	my @i = ();
1335	0					0	for my $i (0 .. $#char) {
1336	0	0				0	if ($char[$i] eq '\-') {
		0
1337	0					0	$char[$i] = '-';
1338							}
1339							elsif ($char[$i] eq '-') {
1340	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1341	0					0	push @i, $i;
1342							}
1343							}
1344							}
1345
1346							# open character list (reverse for splice)
1347	0					0	for my $i (CORE::reverse @i) {
1348	0					0	my @range = ();
1349
1350							# range error
1351	0	0	0			0	if ((CORE::length($char[$i-1]) > CORE::length($char[$i+1])) or ($char[$i-1] gt $char[$i+1])) {
1352	0					0	croak "Invalid tr/// range \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1353							}
1354
1355							# range of multiple-octet code
1356	0	0				0	if (CORE::length($char[$i-1]) == 1) {
		0
		0
		0
1357	0	0				0	if (CORE::length($char[$i+1]) == 1) {
		0
		0
		0
1358	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars1();
	0					0
1359							}
1360							elsif (CORE::length($char[$i+1]) == 2) {
1361	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1362	0					0	push @range, grep {$_ le $char[$i+1]} chars2();
	0					0
1363							}
1364							elsif (CORE::length($char[$i+1]) == 3) {
1365	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1366	0					0	push @range, chars2();
1367	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1368							}
1369							elsif (CORE::length($char[$i+1]) == 4) {
1370	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1371	0					0	push @range, chars2();
1372	0					0	push @range, chars3();
1373	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1374							}
1375							else {
1376	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1377							}
1378							}
1379							elsif (CORE::length($char[$i-1]) == 2) {
1380	0	0				0	if (CORE::length($char[$i+1]) == 2) {
		0
		0
1381	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars2();
	0					0
1382							}
1383							elsif (CORE::length($char[$i+1]) == 3) {
1384	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1385	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1386							}
1387							elsif (CORE::length($char[$i+1]) == 4) {
1388	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1389	0					0	push @range, chars3();
1390	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1391							}
1392							else {
1393	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1394							}
1395							}
1396							elsif (CORE::length($char[$i-1]) == 3) {
1397	0	0				0	if (CORE::length($char[$i+1]) == 3) {
		0
1398	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars3();
	0					0
1399							}
1400							elsif (CORE::length($char[$i+1]) == 4) {
1401	0					0	push @range, grep {$char[$i-1] le $_} chars3();
	0					0
1402	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1403							}
1404							else {
1405	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1406							}
1407							}
1408							elsif (CORE::length($char[$i-1]) == 4) {
1409	0	0				0	if (CORE::length($char[$i+1]) == 4) {
1410	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars4();
	0					0
1411							}
1412							else {
1413	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1414							}
1415							}
1416							else {
1417	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1418							}
1419
1420	0					0	splice @char, $i-1, 3, @range;
1421							}
1422
1423	0					0	return @char;
1424							}
1425
1426							#
1427							# Latin-1 open character class
1428							#
1429							sub _cc {
1430	0	0		0		0	if (scalar(@_) == 0) {
		0
		0
1431	0					0	die __FILE__, ": subroutine cc got no parameter.";
1432							}
1433							elsif (scalar(@_) == 1) {
1434	0					0	return sprintf('\x%02X',$_[0]);
1435							}
1436							elsif (scalar(@_) == 2) {
1437	0	0				0	if ($_[0] > $_[1]) {
		0
		0
1438	0					0	die __FILE__, ": subroutine cc got \$_[0] > \$_[1] parameters).";
1439							}
1440							elsif ($_[0] == $_[1]) {
1441	0					0	return sprintf('\x%02X',$_[0]);
1442							}
1443							elsif (($_[0]+1) == $_[1]) {
1444	0					0	return sprintf('[\\x%02X\\x%02X]',$_[0],$_[1]);
1445							}
1446							else {
1447	0					0	return sprintf('[\\x%02X-\\x%02X]',$_[0],$_[1]);
1448							}
1449							}
1450							else {
1451	0					0	die __FILE__, ": subroutine cc got 3 or more parameters (@{[scalar(@_)]} parameters).";
	0					0
1452							}
1453							}
1454
1455							#
1456							# Latin-1 octet range
1457							#
1458							sub _octets {
1459	0			0		0	my $length = shift @_;
1460
1461	0	0				0	if ($length == 1) {
1462	0					0	my($a1) = unpack 'C', $_[0];
1463	0					0	my($z1) = unpack 'C', $_[1];
1464
1465	0	0				0	if ($a1 > $z1) {
1466	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$a1) . '-\x' . unpack('H',$z1);
1467							}
1468
1469	0	0				0	if ($a1 == $z1) {
		0
1470	0					0	return sprintf('\x%02X',$a1);
1471							}
1472							elsif (($a1+1) == $z1) {
1473	0					0	return sprintf('\x%02X\x%02X',$a1,$z1);
1474							}
1475							else {
1476	0					0	return sprintf('\x%02X-\x%02X',$a1,$z1);
1477							}
1478							}
1479							else {
1480	0					0	die __FILE__, ": subroutine _octets got invalid length ($length).";
1481							}
1482							}
1483
1484							#
1485							# Latin-1 range regexp
1486							#
1487							sub _range_regexp {
1488	0			0		0	my($length,$first,$last) = @_;
1489
1490	0					0	my @range_regexp = ();
1491	0	0				0	if (not exists $range_tr{$length}) {
1492	0					0	return @range_regexp;
1493							}
1494
1495	0					0	my @ranges = @{ $range_tr{$length} };
	0					0
1496	0					0	while (my @range = splice(@ranges,0,$length)) {
1497	0					0	my $min = '';
1498	0					0	my $max = '';
1499	0					0	for (my $i=0; $i < $length; $i++) {
1500	0					0	$min .= pack 'C', $range[$i][0];
1501	0					0	$max .= pack 'C', $range[$i][-1];
1502							}
1503
1504							# min___max
1505							# FIRST_____________LAST
1506							# (nothing)
1507
1508	0	0	0			0	if ($max lt $first) {
		0	0
		0	0
		0	0
		0	0
		0	0
		0	0
1509							}
1510
1511							# **********
1512							# min_________max
1513							# FIRST_____________LAST
1514							# **********
1515
1516							elsif (($min le $first) and ($first le $max) and ($max le $last)) {
1517	0					0	push @range_regexp, _octets($length,$first,$max,$min,$max);
1518							}
1519
1520							# **********************
1521							# min________________max
1522							# FIRST_____________LAST
1523							# **********************
1524
1525							elsif (($min eq $first) and ($max eq $last)) {
1526	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1527							}
1528
1529							# *********
1530							# min___max
1531							# FIRST_____________LAST
1532							# *********
1533
1534							elsif (($first le $min) and ($max le $last)) {
1535	0					0	push @range_regexp, _octets($length,$min,$max,$min,$max);
1536							}
1537
1538							# **********************
1539							# min__________________________max
1540							# FIRST_____________LAST
1541							# **********************
1542
1543							elsif (($min le $first) and ($last le $max)) {
1544	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1545							}
1546
1547							# *********
1548							# min________max
1549							# FIRST_____________LAST
1550							# *********
1551
1552							elsif (($first le $min) and ($min le $last) and ($last le $max)) {
1553	0					0	push @range_regexp, _octets($length,$min,$last,$min,$max);
1554							}
1555
1556							# min___max
1557							# FIRST_____________LAST
1558							# (nothing)
1559
1560							elsif ($last lt $min) {
1561							}
1562
1563							else {
1564	0					0	die __FILE__, ": subroutine _range_regexp panic.";
1565							}
1566							}
1567
1568	0					0	return @range_regexp;
1569							}
1570
1571							#
1572							# Latin-1 open character list for qr and not qr
1573							#
1574							sub _charlist {
1575
1576	0			0		0	my $modifier = pop @_;
1577	0					0	my @char = @_;
1578
1579	0	0				0	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
1580
1581							# unescape character
1582	0					0	for (my $i=0; $i <= $#char; $i++) {
1583
1584							# escape - to ...
1585	0	0	0			0	if ($char[$i] eq '-') {
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
1586	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1587	0					0	$char[$i] = '...';
1588							}
1589							}
1590
1591							# octal escape sequence
1592							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
1593	0					0	$char[$i] = octchr($1);
1594							}
1595
1596							# hexadecimal escape sequence
1597							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
1598	0					0	$char[$i] = hexchr($1);
1599							}
1600
1601							# \N{CHARNAME} --> N\{CHARNAME}
1602							elsif ($char[$i] =~ /\A \\ (N) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1603	0					0	$char[$i] = $1 . '\\' . $2;
1604							}
1605
1606							# \p{PROPERTY} --> p\{PROPERTY}
1607							elsif ($char[$i] =~ /\A \\ (p) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1608	0					0	$char[$i] = $1 . '\\' . $2;
1609							}
1610
1611							# \P{PROPERTY} --> P\{PROPERTY}
1612							elsif ($char[$i] =~ /\A \\ (P) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1613	0					0	$char[$i] = $1 . '\\' . $2;
1614							}
1615
1616							# \p, \P, \X --> p, P, X
1617							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
1618	0					0	$char[$i] = $1;
1619							}
1620
1621							elsif ($char[$i] =~ /\A \\ ([0-7]{2,3}) \z/oxms) {
1622	0					0	$char[$i] = CORE::chr oct $1;
1623							}
1624							elsif ($char[$i] =~ /\A \\x ([0-9A-Fa-f]{1,2}) \z/oxms) {
1625	0					0	$char[$i] = CORE::chr hex $1;
1626							}
1627							elsif ($char[$i] =~ /\A \\c ([\x40-\x5F]) \z/oxms) {
1628	0					0	$char[$i] = CORE::chr(CORE::ord($1) & 0x1F);
1629							}
1630							elsif ($char[$i] =~ /\A (\\ [0nrtfbaedswDSWHVhvR]) \z/oxms) {
1631	0					0	$char[$i] = {
1632							'\0' => "\0",
1633							'\n' => "\n",
1634							'\r' => "\r",
1635							'\t' => "\t",
1636							'\f' => "\f",
1637							'\b' => "\x08", # \b means backspace in character class
1638							'\a' => "\a",
1639							'\e' => "\e",
1640							'\d' => '[0-9]',
1641
1642							# Vertical tabs are now whitespace
1643							# \s in a regex now matches a vertical tab in all circumstances.
1644							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1645							# \t \n \v \f \r space
1646							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1647							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1648							'\s' => '\s',
1649
1650							'\w' => '[0-9A-Z_a-z]',
1651							'\D' => '${Char::Elatin1::eD}',
1652							'\S' => '${Char::Elatin1::eS}',
1653							'\W' => '${Char::Elatin1::eW}',
1654
1655							'\H' => '${Char::Elatin1::eH}',
1656							'\V' => '${Char::Elatin1::eV}',
1657							'\h' => '[\x09\x20]',
1658							'\v' => '[\x0A\x0B\x0C\x0D]',
1659							'\R' => '${Char::Elatin1::eR}',
1660
1661							}->{$1};
1662							}
1663
1664							# POSIX-style character classes
1665							elsif ($ignorecase and ($char[$i] =~ /\A ( \[\: \^? (?:lower\|upper) :\] ) \z/oxms)) {
1666	0					0	$char[$i] = {
1667
1668							'[:lower:]' => '[\x41-\x5A\x61-\x7A]',
1669							'[:upper:]' => '[\x41-\x5A\x61-\x7A]',
1670							'[:^lower:]' => '${Char::Elatin1::not_lower_i}',
1671							'[:^upper:]' => '${Char::Elatin1::not_upper_i}',
1672
1673							}->{$1};
1674							}
1675							elsif ($char[$i] =~ /\A ( \[\: \^? (?:alnum\|alpha\|ascii\|blank\|cntrl\|digit\|graph\|lower\|print\|punct\|space\|upper\|word\|xdigit) :\] ) \z/oxms) {
1676	0					0	$char[$i] = {
1677
1678							'[:alnum:]' => '[\x30-\x39\x41-\x5A\x61-\x7A]',
1679							'[:alpha:]' => '[\x41-\x5A\x61-\x7A]',
1680							'[:ascii:]' => '[\x00-\x7F]',
1681							'[:blank:]' => '[\x09\x20]',
1682							'[:cntrl:]' => '[\x00-\x1F\x7F]',
1683							'[:digit:]' => '[\x30-\x39]',
1684							'[:graph:]' => '[\x21-\x7F]',
1685							'[:lower:]' => '[\x61-\x7A]',
1686							'[:print:]' => '[\x20-\x7F]',
1687							'[:punct:]' => '[\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]',
1688
1689							# P.174 POSIX-Style Character Classes
1690							# in Chapter 5: Pattern Matching
1691							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1692
1693							# P.311 11.2.4 Character Classes and other Special Escapes
1694							# in Chapter 11: perlre: Perl regular expressions
1695							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1696
1697							# P.210 POSIX-Style Character Classes
1698							# in Chapter 5: Pattern Matching
1699							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1700
1701							'[:space:]' => '[\s\x0B]', # "\s" plus vertical tab ("\cK")
1702
1703							'[:upper:]' => '[\x41-\x5A]',
1704							'[:word:]' => '[\x30-\x39\x41-\x5A\x5F\x61-\x7A]',
1705							'[:xdigit:]' => '[\x30-\x39\x41-\x46\x61-\x66]',
1706							'[:^alnum:]' => '${Char::Elatin1::not_alnum}',
1707							'[:^alpha:]' => '${Char::Elatin1::not_alpha}',
1708							'[:^ascii:]' => '${Char::Elatin1::not_ascii}',
1709							'[:^blank:]' => '${Char::Elatin1::not_blank}',
1710							'[:^cntrl:]' => '${Char::Elatin1::not_cntrl}',
1711							'[:^digit:]' => '${Char::Elatin1::not_digit}',
1712							'[:^graph:]' => '${Char::Elatin1::not_graph}',
1713							'[:^lower:]' => '${Char::Elatin1::not_lower}',
1714							'[:^print:]' => '${Char::Elatin1::not_print}',
1715							'[:^punct:]' => '${Char::Elatin1::not_punct}',
1716							'[:^space:]' => '${Char::Elatin1::not_space}',
1717							'[:^upper:]' => '${Char::Elatin1::not_upper}',
1718							'[:^word:]' => '${Char::Elatin1::not_word}',
1719							'[:^xdigit:]' => '${Char::Elatin1::not_xdigit}',
1720
1721							}->{$1};
1722							}
1723							elsif ($char[$i] =~ /\A \\ ($q_char) \z/oxms) {
1724	0					0	$char[$i] = $1;
1725							}
1726							}
1727
1728							# open character list
1729	0					0	my @singleoctet = ();
1730	0					0	my @multipleoctet = ();
1731	0					0	for (my $i=0; $i <= $#char; ) {
1732
1733							# escaped -
1734	0	0	0			0	if (defined($char[$i+1]) and ($char[$i+1] eq '...')) {
		0
		0
		0
		0
		0
1735	0					0	$i += 1;
1736	0					0	next;
1737							}
1738
1739							# make range regexp
1740							elsif ($char[$i] eq '...') {
1741
1742							# range error
1743	0	0				0	if (CORE::length($char[$i-1]) > CORE::length($char[$i+1])) {
		0
1744	0					0	croak 'Invalid [] range in regexp (length(A) > length(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1745							}
1746							elsif (CORE::length($char[$i-1]) == CORE::length($char[$i+1])) {
1747	0	0				0	if ($char[$i-1] gt $char[$i+1]) {
1748	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1749							}
1750							}
1751
1752							# make range regexp per length
1753	0					0	for my $length (CORE::length($char[$i-1]) .. CORE::length($char[$i+1])) {
1754	0					0	my @regexp = ();
1755
1756							# is first and last
1757	0	0	0			0	if (($length == CORE::length($char[$i-1])) and ($length == CORE::length($char[$i+1]))) {
		0	0
		0
		0
1758	0					0	push @regexp, _range_regexp($length, $char[$i-1], $char[$i+1]);
1759							}
1760
1761							# is first
1762							elsif ($length == CORE::length($char[$i-1])) {
1763	0					0	push @regexp, _range_regexp($length, $char[$i-1], "\xFF" x $length);
1764							}
1765
1766							# is inside in first and last
1767							elsif ((CORE::length($char[$i-1]) < $length) and ($length < CORE::length($char[$i+1]))) {
1768	0					0	push @regexp, _range_regexp($length, "\x00" x $length, "\xFF" x $length);
1769							}
1770
1771							# is last
1772							elsif ($length == CORE::length($char[$i+1])) {
1773	0					0	push @regexp, _range_regexp($length, "\x00" x $length, $char[$i+1]);
1774							}
1775
1776							else {
1777	0					0	die __FILE__, ": subroutine make_regexp panic.";
1778							}
1779
1780	0	0				0	if ($length == 1) {
1781	0					0	push @singleoctet, @regexp;
1782							}
1783							else {
1784	0					0	push @multipleoctet, @regexp;
1785							}
1786							}
1787
1788	0					0	$i += 2;
1789							}
1790
1791							# with /i modifier
1792							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1793	0	0				0	if ($modifier =~ /i/oxms) {
1794	0					0	my $uc = Char::Elatin1::uc($char[$i]);
1795	0					0	my $fc = Char::Elatin1::fc($char[$i]);
1796	0	0				0	if ($uc ne $fc) {
1797	0	0				0	if (CORE::length($fc) == 1) {
1798	0					0	push @singleoctet, $uc, $fc;
1799							}
1800							else {
1801	0					0	push @singleoctet, $uc;
1802	0					0	push @multipleoctet, $fc;
1803							}
1804							}
1805							else {
1806	0					0	push @singleoctet, $char[$i];
1807							}
1808							}
1809							else {
1810	0					0	push @singleoctet, $char[$i];
1811							}
1812	0					0	$i += 1;
1813							}
1814
1815							# single character of single octet code
1816							elsif ($char[$i] =~ /\A (?: \\h ) \z/oxms) {
1817	0					0	push @singleoctet, "\t", "\x20";
1818	0					0	$i += 1;
1819							}
1820							elsif ($char[$i] =~ /\A (?: \\v ) \z/oxms) {
1821	0					0	push @singleoctet, "\x0A", "\x0B", "\x0C", "\x0D";
1822	0					0	$i += 1;
1823							}
1824							elsif ($char[$i] =~ /\A (?: \\d \| \\s \| \\w ) \z/oxms) {
1825	0					0	push @singleoctet, $char[$i];
1826	0					0	$i += 1;
1827							}
1828
1829							# single character of multiple-octet code
1830							else {
1831	0					0	push @multipleoctet, $char[$i];
1832	0					0	$i += 1;
1833							}
1834							}
1835
1836							# quote metachar
1837	0					0	for (@singleoctet) {
1838	0	0				0	if ($_ eq '...') {
		0
		0
		0
		0
1839	0					0	$_ = '-';
1840							}
1841							elsif (/\A \n \z/oxms) {
1842	0					0	$_ = '\n';
1843							}
1844							elsif (/\A \r \z/oxms) {
1845	0					0	$_ = '\r';
1846							}
1847							elsif (/\A ([\x00-\x20\x7F-\xFF]) \z/oxms) {
1848	0					0	$_ = sprintf('\x%02X', CORE::ord $1);
1849							}
1850							elsif (/\A [\x00-\xFF] \z/oxms) {
1851	0					0	$_ = quotemeta $_;
1852							}
1853							}
1854
1855							# return character list
1856	0					0	return \@singleoctet, \@multipleoctet;
1857							}
1858
1859							#
1860							# Latin-1 octal escape sequence
1861							#
1862							sub octchr {
1863	0			0	0	0	my($octdigit) = @_;
1864
1865	0					0	my @binary = ();
1866	0					0	for my $octal (split(//,$octdigit)) {
1867	0					0	push @binary, {
1868							'0' => '000',
1869							'1' => '001',
1870							'2' => '010',
1871							'3' => '011',
1872							'4' => '100',
1873							'5' => '101',
1874							'6' => '110',
1875							'7' => '111',
1876							}->{$octal};
1877							}
1878	0					0	my $binary = join '', @binary;
1879
1880	0					0	my $octchr = {
1881							# 1234567
1882							1 => pack('B*', "0000000$binary"),
1883							2 => pack('B*', "000000$binary"),
1884							3 => pack('B*', "00000$binary"),
1885							4 => pack('B*', "0000$binary"),
1886							5 => pack('B*', "000$binary"),
1887							6 => pack('B*', "00$binary"),
1888							7 => pack('B*', "0$binary"),
1889							0 => pack('B*', "$binary"),
1890
1891							}->{CORE::length($binary) % 8};
1892
1893	0					0	return $octchr;
1894							}
1895
1896							#
1897							# Latin-1 hexadecimal escape sequence
1898							#
1899							sub hexchr {
1900	0			0	0	0	my($hexdigit) = @_;
1901
1902	0					0	my $hexchr = {
1903							1 => pack('H*', "0$hexdigit"),
1904							0 => pack('H*', "$hexdigit"),
1905
1906							}->{CORE::length($_[0]) % 2};
1907
1908	0					0	return $hexchr;
1909							}
1910
1911							#
1912							# Latin-1 open character list for qr
1913							#
1914							sub charlist_qr {
1915
1916	0			0	0	0	my $modifier = pop @_;
1917	0					0	my @char = @_;
1918
1919	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
1920	0					0	my @singleoctet = @$singleoctet;
1921	0					0	my @multipleoctet = @$multipleoctet;
1922
1923							# return character list
1924	0	0				0	if (scalar(@singleoctet) >= 1) {
1925
1926							# with /i modifier
1927	0	0				0	if ($modifier =~ m/i/oxms) {
1928	0					0	my %singleoctet_ignorecase = ();
1929	0					0	for (@singleoctet) {
1930	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
1931	0					0	for my $ord (hex($1) .. hex($2)) {
1932	0					0	my $char = CORE::chr($ord);
1933	0					0	my $uc = Char::Elatin1::uc($char);
1934	0					0	my $fc = Char::Elatin1::fc($char);
1935	0	0				0	if ($uc eq $fc) {
1936	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
1937							}
1938							else {
1939	0	0				0	if (CORE::length($fc) == 1) {
1940	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1941	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
1942							}
1943							else {
1944	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1945	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
1946							}
1947							}
1948							}
1949							}
1950	0	0				0	if ($_ ne '') {
1951	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
1952							}
1953							}
1954	0					0	my $i = 0;
1955	0					0	my @singleoctet_ignorecase = ();
1956	0					0	for my $ord (0 .. 255) {
1957	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
1958	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
1959							}
1960							else {
1961	0					0	$i++;
1962							}
1963							}
1964	0					0	@singleoctet = ();
1965	0					0	for my $range (@singleoctet_ignorecase) {
1966	0	0				0	if (ref $range) {
1967	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
1968	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
1969							}
1970							elsif (scalar(@{$range}) == 2) {
1971	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1972							}
1973							else {
1974	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1975							}
1976							}
1977							}
1978							}
1979
1980	0					0	my $not_anchor = '';
1981
1982	0					0	push @multipleoctet, join('', $not_anchor, '[', @singleoctet, ']' );
1983							}
1984	0	0				0	if (scalar(@multipleoctet) >= 2) {
1985	0					0	return '(?:' . join('\|', @multipleoctet) . ')';
1986							}
1987							else {
1988	0					0	return $multipleoctet[0];
1989							}
1990							}
1991
1992							#
1993							# Latin-1 open character list for not qr
1994							#
1995							sub charlist_not_qr {
1996
1997	0			0	0	0	my $modifier = pop @_;
1998	0					0	my @char = @_;
1999
2000	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
2001	0					0	my @singleoctet = @$singleoctet;
2002	0					0	my @multipleoctet = @$multipleoctet;
2003
2004							# with /i modifier
2005	0	0				0	if ($modifier =~ m/i/oxms) {
2006	0					0	my %singleoctet_ignorecase = ();
2007	0					0	for (@singleoctet) {
2008	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
2009	0					0	for my $ord (hex($1) .. hex($2)) {
2010	0					0	my $char = CORE::chr($ord);
2011	0					0	my $uc = Char::Elatin1::uc($char);
2012	0					0	my $fc = Char::Elatin1::fc($char);
2013	0	0				0	if ($uc eq $fc) {
2014	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
2015							}
2016							else {
2017	0	0				0	if (CORE::length($fc) == 1) {
2018	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2019	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
2020							}
2021							else {
2022	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2023	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
2024							}
2025							}
2026							}
2027							}
2028	0	0				0	if ($_ ne '') {
2029	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
2030							}
2031							}
2032	0					0	my $i = 0;
2033	0					0	my @singleoctet_ignorecase = ();
2034	0					0	for my $ord (0 .. 255) {
2035	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
2036	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
2037							}
2038							else {
2039	0					0	$i++;
2040							}
2041							}
2042	0					0	@singleoctet = ();
2043	0					0	for my $range (@singleoctet_ignorecase) {
2044	0	0				0	if (ref $range) {
2045	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
2046	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
2047							}
2048							elsif (scalar(@{$range}) == 2) {
2049	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2050							}
2051							else {
2052	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2053							}
2054							}
2055							}
2056							}
2057
2058							# return character list
2059	0	0				0	if (scalar(@multipleoctet) >= 1) {
2060	0	0				0	if (scalar(@singleoctet) >= 1) {
2061
2062							# any character other than multiple-octet and single octet character class
2063	0					0	return '(?!' . join('\|', @multipleoctet) . ')(?:[^' . join('', @singleoctet) . '])';
2064							}
2065							else {
2066
2067							# any character other than multiple-octet character class
2068	0					0	return '(?!' . join('\|', @multipleoctet) . ")(?:$your_char)";
2069							}
2070							}
2071							else {
2072	0	0				0	if (scalar(@singleoctet) >= 1) {
2073
2074							# any character other than single octet character class
2075	0					0	return '(?:[^' . join('', @singleoctet) . '])';
2076							}
2077							else {
2078
2079							# any character
2080	0					0	return "(?:$your_char)";
2081							}
2082							}
2083							}
2084
2085							#
2086							# open file in read mode
2087							#
2088							sub _open_r {
2089	177			177		686	my(undef,$file) = @_;
2090	177					1040	$file =~ s#\A (\s) #./$1#oxms;
2091	177		33			16242	return eval(q{open($_[0],'<',$_[1])}) \|\|
2092							open($_[0],"< $file\0");
2093							}
2094
2095							#
2096							# open file in write mode
2097							#
2098							sub _open_w {
2099	0			0		0	my(undef,$file) = @_;
2100	0					0	$file =~ s#\A (\s) #./$1#oxms;
2101	0		0			0	return eval(q{open($_[0],'>',$_[1])}) \|\|
2102							open($_[0],"> $file\0");
2103							}
2104
2105							#
2106							# open file in append mode
2107							#
2108							sub _open_a {
2109	0			0		0	my(undef,$file) = @_;
2110	0					0	$file =~ s#\A (\s) #./$1#oxms;
2111	0		0			0	return eval(q{open($_[0],'>>',$_[1])}) \|\|
2112							open($_[0],">> $file\0");
2113							}
2114
2115							#
2116							# safe system
2117							#
2118							sub _systemx {
2119
2120							# P.707 29.2.33. exec
2121							# in Chapter 29: Functions
2122							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2123							#
2124							# Be aware that in older releases of Perl, exec (and system) did not flush
2125							# your output buffer, so you needed to enable command buffering by setting $\|
2126							# on one or more filehandles to avoid lost output in the case of exec, or
2127							# misordererd output in the case of system. This situation was largely remedied
2128							# in the 5.6 release of Perl. (So, 5.005 release not yet.)
2129
2130							# P.855 exec
2131							# in Chapter 27: Functions
2132							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2133							#
2134							# In very old release of Perl (before v5.6), exec (and system) did not flush
2135							# your output buffer, so you needed to enable command buffering by setting $\|
2136							# on one or more filehandles to avoid lost output with exec or misordered
2137							# output with system.
2138
2139	177			177		712	$\| = 1;
2140
2141							# P.565 23.1.2. Cleaning Up Your Environment
2142							# in Chapter 23: Security
2143							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2144
2145							# P.656 Cleaning Up Your Environment
2146							# in Chapter 20: Security
2147							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2148
2149							# local $ENV{'PATH'} = '.';
2150	177					1853	local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer
2151
2152							# P.707 29.2.33. exec
2153							# in Chapter 29: Functions
2154							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2155							#
2156							# As we mentioned earlier, exec treats a discrete list of arguments as an
2157							# indication that it should bypass shell processing. However, there is one
2158							# place where you might still get tripped up. The exec call (and system, too)
2159							# will not distinguish between a single scalar argument and an array containing
2160							# only one element.
2161							#
2162							# @args = ("echo surprise"); # just one element in list
2163							# exec @args # still subject to shell escapes
2164							# or die "exec: $!"; # because @args == 1
2165							#
2166							# To avoid this, you can use the PATHNAME syntax, explicitly duplicating the
2167							# first argument as the pathname, which forces the rest of the arguments to be
2168							# interpreted as a list, even if there is only one of them:
2169							#
2170							# exec { $args[0] } @args # safe even with one-argument list
2171							# or die "can't exec @args: $!";
2172
2173							# P.855 exec
2174							# in Chapter 27: Functions
2175							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2176							#
2177							# As we mentioned earlier, exec treats a discrete list of arguments as a
2178							# directive to bypass shell processing. However, there is one place where
2179							# you might still get tripped up. The exec call (and system, too) cannot
2180							# distinguish between a single scalar argument and an array containing
2181							# only one element.
2182							#
2183							# @args = ("echo surprise"); # just one element in list
2184							# exec @args # still subject to shell escapes
2185							# \|\| die "exec: $!"; # because @args == 1
2186							#
2187							# To avoid this, use the PATHNAME syntax, explicitly duplicating the first
2188							# argument as the pathname, which forces the rest of the arguments to be
2189							# interpreted as a list, even if there is only one of them:
2190							#
2191							# exec { $args[0] } @args # safe even with one-argument list
2192							# \|\| die "can't exec @args: $!";
2193
2194	177					405	return CORE::system { $_[0] } @_; # safe even with one-argument list
	177					11234625
2195							}
2196
2197							#
2198							# Latin-1 order to character (with parameter)
2199							#
2200							sub Char::Elatin1::chr(;$) {
2201
2202	0	0		0	0		my $c = @_ ? $_[0] : $_;
2203
2204	0	0					if ($c == 0x00) {
2205	0						return "\x00";
2206							}
2207							else {
2208	0						my @chr = ();
2209	0						while ($c > 0) {
2210	0						unshift @chr, ($c % 0x100);
2211	0						$c = int($c / 0x100);
2212							}
2213	0						return pack 'C*', @chr;
2214							}
2215							}
2216
2217							#
2218							# Latin-1 order to character (without parameter)
2219							#
2220							sub Char::Elatin1::chr_() {
2221
2222	0			0	0		my $c = $_;
2223
2224	0	0					if ($c == 0x00) {
2225	0						return "\x00";
2226							}
2227							else {
2228	0						my @chr = ();
2229	0						while ($c > 0) {
2230	0						unshift @chr, ($c % 0x100);
2231	0						$c = int($c / 0x100);
2232							}
2233	0						return pack 'C*', @chr;
2234							}
2235							}
2236
2237							#
2238							# Latin-1 path globbing (with parameter)
2239							#
2240							sub Char::Elatin1::glob($) {
2241
2242	0	0		0	0		if (wantarray) {
2243	0						my @glob = _DOS_like_glob(@_);
2244	0						for my $glob (@glob) {
2245	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2246							}
2247	0						return @glob;
2248							}
2249							else {
2250	0						my $glob = _DOS_like_glob(@_);
2251	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2252	0						return $glob;
2253							}
2254							}
2255
2256							#
2257							# Latin-1 path globbing (without parameter)
2258							#
2259							sub Char::Elatin1::glob_() {
2260
2261	0	0		0	0		if (wantarray) {
2262	0						my @glob = _DOS_like_glob();
2263	0						for my $glob (@glob) {
2264	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2265							}
2266	0						return @glob;
2267							}
2268							else {
2269	0						my $glob = _DOS_like_glob();
2270	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2271	0						return $glob;
2272							}
2273							}
2274
2275							#
2276							# Latin-1 path globbing via File::DosGlob 1.10
2277							#
2278							# Often I confuse "_dosglob" and "_doglob".
2279							# So, I renamed "_dosglob" to "_DOS_like_glob".
2280							#
2281							my %iter;
2282							my %entries;
2283							sub _DOS_like_glob {
2284
2285							# context (keyed by second cxix argument provided by core)
2286	0			0			my($expr,$cxix) = @_;
2287
2288							# glob without args defaults to $_
2289	0	0					$expr = $_ if not defined $expr;
2290
2291							# represents the current user's home directory
2292							#
2293							# 7.3. Expanding Tildes in Filenames
2294							# in Chapter 7. File Access
2295							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2296							#
2297							# and File::HomeDir, File::HomeDir::Windows module
2298
2299							# DOS-like system
2300	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2301	0						$expr =~ s{ \A ~ (?= [^/\\] ) }
2302	0						{ my_home_MSWin32() }oxmse;
2303							}
2304
2305							# UNIX-like system
2306							else {
2307	0						$expr =~ s{ \A ~ ( (?:[^/])* ) }
2308	0	0	0				{ $1 ? (eval(q{(getpwnam($1))[7]})\|\|my_home()) : my_home() }oxmse;
2309							}
2310
2311							# assume global context if not provided one
2312	0	0					$cxix = '_G_' if not defined $cxix;
2313	0	0					$iter{$cxix} = 0 if not exists $iter{$cxix};
2314
2315							# if we're just beginning, do it all first
2316	0	0					if ($iter{$cxix} == 0) {
2317	0						$entries{$cxix} = [ _do_glob(1, _parse_line($expr)) ];
2318							}
2319
2320							# chuck it all out, quick or slow
2321	0	0					if (wantarray) {
2322	0						delete $iter{$cxix};
2323	0						return @{delete $entries{$cxix}};
	0
2324							}
2325							else {
2326	0	0					if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
	0
2327	0						return shift @{$entries{$cxix}};
	0
2328							}
2329							else {
2330							# return undef for EOL
2331	0						delete $iter{$cxix};
2332	0						delete $entries{$cxix};
2333	0						return undef;
2334							}
2335							}
2336							}
2337
2338							#
2339							# Latin-1 path globbing subroutine
2340							#
2341							sub _do_glob {
2342
2343	0			0			my($cond,@expr) = @_;
2344	0						my @glob = ();
2345	0						my $fix_drive_relative_paths = 0;
2346
2347							OUTER:
2348	0						for my $expr (@expr) {
2349	0	0					next OUTER if not defined $expr;
2350	0	0					next OUTER if $expr eq '';
2351
2352	0						my @matched = ();
2353	0						my @globdir = ();
2354	0						my $head = '.';
2355	0						my $pathsep = '/';
2356	0						my $tail;
2357
2358							# if argument is within quotes strip em and do no globbing
2359	0	0					if ($expr =~ /\A " ((?:$q_char)*) " \z/oxms) {
2360	0						$expr = $1;
2361	0	0					if ($cond eq 'd') {
2362	0	0					if (-d $expr) {
2363	0						push @glob, $expr;
2364							}
2365							}
2366							else {
2367	0	0					if (-e $expr) {
2368	0						push @glob, $expr;
2369							}
2370							}
2371	0						next OUTER;
2372							}
2373
2374							# wildcards with a drive prefix such as h:*.pm must be changed
2375							# to h:./*.pm to expand correctly
2376	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2377	0	0					if ($expr =~ s# \A ((?:[A-Za-z]:)?) ([^/\\]) #$1./$2#oxms) {
2378	0						$fix_drive_relative_paths = 1;
2379							}
2380							}
2381
2382	0	0					if (($head, $tail) = _parse_path($expr,$pathsep)) {
2383	0	0					if ($tail eq '') {
2384	0						push @glob, $expr;
2385	0						next OUTER;
2386							}
2387	0	0					if ($head =~ / \A (?:$q_char)? [?] /oxms) {
2388	0	0					if (@globdir = _do_glob('d', $head)) {
2389	0						push @glob, _do_glob($cond, map {"$_$pathsep$tail"} @globdir);
	0
2390	0						next OUTER;
2391							}
2392							}
2393	0	0	0				if ($head eq '' or $head =~ /\A [A-Za-z]: \z/oxms) {
2394	0						$head .= $pathsep;
2395							}
2396	0						$expr = $tail;
2397							}
2398
2399							# If file component has no wildcards, we can avoid opendir
2400	0	0					if ($expr !~ / \A (?:$q_char)? [?] /oxms) {
2401	0	0					if ($head eq '.') {
2402	0						$head = '';
2403							}
2404	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2405	0						$head .= $pathsep;
2406							}
2407	0						$head .= $expr;
2408	0	0					if ($cond eq 'd') {
2409	0	0					if (-d $head) {
2410	0						push @glob, $head;
2411							}
2412							}
2413							else {
2414	0	0					if (-e $head) {
2415	0						push @glob, $head;
2416							}
2417							}
2418	0						next OUTER;
2419							}
2420	0	0					opendir(*DIR, $head) or next OUTER;
2421	0						my @leaf = readdir DIR;
2422	0						closedir DIR;
2423
2424	0	0					if ($head eq '.') {
2425	0						$head = '';
2426							}
2427	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2428	0						$head .= $pathsep;
2429							}
2430
2431	0						my $pattern = '';
2432	0						while ($expr =~ / \G ($q_char) /oxgc) {
2433	0						my $char = $1;
2434
2435							# 6.9. Matching Shell Globs as Regular Expressions
2436							# in Chapter 6. Pattern Matching
2437							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2438							# (and so on)
2439
2440	0	0					if ($char eq '*') {
		0
		0
2441	0						$pattern .= "(?:$your_char)*",
2442							}
2443							elsif ($char eq '?') {
2444	0						$pattern .= "(?:$your_char)?", # DOS style
2445							# $pattern .= "(?:$your_char)", # UNIX style
2446							}
2447							elsif ((my $fc = Char::Elatin1::fc($char)) ne $char) {
2448	0						$pattern .= $fc;
2449							}
2450							else {
2451	0						$pattern .= quotemeta $char;
2452							}
2453							}
2454	0			0			my $matchsub = sub { Char::Elatin1::fc($_[0]) =~ /\A $pattern \z/xms };
	0
2455
2456							# if ($@) {
2457							# print STDERR "$0: $@\n";
2458							# next OUTER;
2459							# }
2460
2461							INNER:
2462	0						for my $leaf (@leaf) {
2463	0	0	0				if ($leaf eq '.' or $leaf eq '..') {
2464	0						next INNER;
2465							}
2466	0	0	0				if ($cond eq 'd' and not -d "$head$leaf") {
2467	0						next INNER;
2468							}
2469
2470	0	0					if (&$matchsub($leaf)) {
2471	0						push @matched, "$head$leaf";
2472	0						next INNER;
2473							}
2474
2475							# [DOS compatibility special case]
2476							# Failed, add a trailing dot and try again, but only...
2477
2478	0	0	0				if (Char::Elatin1::index($leaf,'.') == -1 and # if name does not have a dot in it and
			0
2479							CORE::length($leaf) <= 8 and # name is shorter than or equal to 8 chars and
2480							Char::Elatin1::index($pattern,'\\.') != -1 # pattern has a dot.
2481							) {
2482	0	0					if (&$matchsub("$leaf.")) {
2483	0						push @matched, "$head$leaf";
2484	0						next INNER;
2485							}
2486							}
2487							}
2488	0	0					if (@matched) {
2489	0						push @glob, @matched;
2490							}
2491							}
2492	0	0					if ($fix_drive_relative_paths) {
2493	0						for my $glob (@glob) {
2494	0						$glob =~ s# \A ([A-Za-z]:) \./ #$1#oxms;
2495							}
2496							}
2497	0						return @glob;
2498							}
2499
2500							#
2501							# Latin-1 parse line
2502							#
2503							sub _parse_line {
2504
2505	0			0			my($line) = @_;
2506
2507	0						$line .= ' ';
2508	0						my @piece = ();
2509	0						while ($line =~ /
2510							" ( (?: [^"] )* ) " \s+ \|
2511							( (?: [^"\s] )* ) \s+
2512							/oxmsg
2513							) {
2514	0	0					push @piece, defined($1) ? $1 : $2;
2515							}
2516	0						return @piece;
2517							}
2518
2519							#
2520							# Latin-1 parse path
2521							#
2522							sub _parse_path {
2523
2524	0			0			my($path,$pathsep) = @_;
2525
2526	0						$path .= '/';
2527	0						my @subpath = ();
2528	0						while ($path =~ /
2529							((?: [^\/\\] )+?) [\/\\]
2530							/oxmsg
2531							) {
2532	0						push @subpath, $1;
2533							}
2534
2535	0						my $tail = pop @subpath;
2536	0						my $head = join $pathsep, @subpath;
2537	0						return $head, $tail;
2538							}
2539
2540							#
2541							# via File::HomeDir::Windows 1.00
2542							#
2543							sub my_home_MSWin32 {
2544
2545							# A lot of unix people and unix-derived tools rely on
2546							# the ability to overload HOME. We will support it too
2547							# so that they can replace raw HOME calls with File::HomeDir.
2548	0	0	0	0	0		if (exists $ENV{'HOME'} and $ENV{'HOME'}) {
		0	0
		0	0
			0
			0
2549	0						return $ENV{'HOME'};
2550							}
2551
2552							# Do we have a user profile?
2553							elsif (exists $ENV{'USERPROFILE'} and $ENV{'USERPROFILE'}) {
2554	0						return $ENV{'USERPROFILE'};
2555							}
2556
2557							# Some Windows use something like $ENV{'HOME'}
2558							elsif (exists $ENV{'HOMEDRIVE'} and exists $ENV{'HOMEPATH'} and $ENV{'HOMEDRIVE'} and $ENV{'HOMEPATH'}) {
2559	0						return join '', $ENV{'HOMEDRIVE'}, $ENV{'HOMEPATH'};
2560							}
2561
2562	0						return undef;
2563							}
2564
2565							#
2566							# via File::HomeDir::Unix 1.00
2567							#
2568							sub my_home {
2569	0			0	0		my $home;
2570
2571	0	0	0				if (exists $ENV{'HOME'} and defined $ENV{'HOME'}) {
		0	0
2572	0						$home = $ENV{'HOME'};
2573							}
2574
2575							# This is from the original code, but I'm guessing
2576							# it means "login directory" and exists on some Unixes.
2577							elsif (exists $ENV{'LOGDIR'} and $ENV{'LOGDIR'}) {
2578	0						$home = $ENV{'LOGDIR'};
2579							}
2580
2581							### More-desperate methods
2582
2583							# Light desperation on any (Unixish) platform
2584							else {
2585	0						$home = eval q{ (getpwuid($<))[7] };
2586							}
2587
2588							# On Unix in general, a non-existant home means "no home"
2589							# For example, "nobody"-like users might use /nonexistant
2590	0	0	0				if (defined $home and ! -d($home)) {
2591	0						$home = undef;
2592							}
2593	0						return $home;
2594							}
2595
2596							#
2597							# ${^PREMATCH}, $PREMATCH, $` the string preceding what was matched
2598							#
2599							sub Char::Elatin1::PREMATCH {
2600	0			0	0		return $`;
2601							}
2602
2603							#
2604							# ${^MATCH}, $MATCH, $& the string that matched
2605							#
2606							sub Char::Elatin1::MATCH {
2607	0			0	0		return $&;
2608							}
2609
2610							#
2611							# ${^POSTMATCH}, $POSTMATCH, $' the string following what was matched
2612							#
2613							sub Char::Elatin1::POSTMATCH {
2614	0			0	0		return $';
2615							}
2616
2617							#
2618							# Latin-1 character to order (with parameter)
2619							#
2620							sub Char::Latin1::ord(;$) {
2621
2622	0	0		0	1		local $_ = shift if @_;
2623
2624	0	0					if (/\A ($q_char) /oxms) {
2625	0						my @ord = unpack 'C*', $1;
2626	0						my $ord = 0;
2627	0						while (my $o = shift @ord) {
2628	0						$ord = $ord * 0x100 + $o;
2629							}
2630	0						return $ord;
2631							}
2632							else {
2633	0						return CORE::ord $_;
2634							}
2635							}
2636
2637							#
2638							# Latin-1 character to order (without parameter)
2639							#
2640							sub Char::Latin1::ord_() {
2641
2642	0	0		0	0		if (/\A ($q_char) /oxms) {
2643	0						my @ord = unpack 'C*', $1;
2644	0						my $ord = 0;
2645	0						while (my $o = shift @ord) {
2646	0						$ord = $ord * 0x100 + $o;
2647							}
2648	0						return $ord;
2649							}
2650							else {
2651	0						return CORE::ord $_;
2652							}
2653							}
2654
2655							#
2656							# Latin-1 reverse
2657							#
2658							sub Char::Latin1::reverse(@) {
2659
2660	0	0		0	0		if (wantarray) {
2661	0						return CORE::reverse @_;
2662							}
2663							else {
2664
2665							# One of us once cornered Larry in an elevator and asked him what
2666							# problem he was solving with this, but he looked as far off into
2667							# the distance as he could in an elevator and said, "It seemed like
2668							# a good idea at the time."
2669
2670	0						return join '', CORE::reverse(join('',@_) =~ /\G ($q_char) /oxmsg);
2671							}
2672							}
2673
2674							#
2675							# Latin-1 getc (with parameter, without parameter)
2676							#
2677							sub Char::Latin1::getc(;*@) {
2678
2679	0			0	0		my($package) = caller;
2680	0	0					my $fh = @_ ? qualify_to_ref(shift,$package) : \*STDIN;
2681	0	0	0				croak 'Too many arguments for Char::Latin1::getc' if @_ and not wantarray;
2682
2683	0						my @length = sort { $a <=> $b } keys %range_tr;
	0
2684	0						my $getc = '';
2685	0						for my $length ($length[0] .. $length[-1]) {
2686	0						$getc .= CORE::getc($fh);
2687	0	0					if (exists $range_tr{CORE::length($getc)}) {
2688	0	0					if ($getc =~ /\A ${Char::Elatin1::dot_s} \z/oxms) {
2689	0	0					return wantarray ? ($getc,@_) : $getc;
2690							}
2691							}
2692							}
2693	0	0					return wantarray ? ($getc,@_) : $getc;
2694							}
2695
2696							#
2697							# Latin-1 length by character
2698							#
2699							sub Char::Latin1::length(;$) {
2700
2701	0	0		0	1		local $_ = shift if @_;
2702
2703	0						local @_ = /\G ($q_char) /oxmsg;
2704	0						return scalar @_;
2705							}
2706
2707							#
2708							# Latin-1 substr by character
2709							#
2710							BEGIN {
2711
2712							# P.232 The lvalue Attribute
2713							# in Chapter 6: Subroutines
2714							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2715
2716							# P.336 The lvalue Attribute
2717							# in Chapter 7: Subroutines
2718							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2719
2720							# P.144 8.4 Lvalue subroutines
2721							# in Chapter 8: perlsub: Perl subroutines
2722							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
2723
2724	177	50	0	177	1	274260	eval sprintf(<<'END', ($] >= 5.014000) ? ':lvalue' : '');
	0	0		0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
2725							# vv----------------*******
2726							sub Char::Latin1::substr($$;$$) %s {
2727
2728							my @char = $_[0] =~ /\G ($q_char) /oxmsg;
2729
2730							# If the substring is beyond either end of the string, substr() returns the undefined
2731							# value and produces a warning. When used as an lvalue, specifying a substring that
2732							# is entirely outside the string raises an exception.
2733							# http://perldoc.perl.org/functions/substr.html
2734
2735							# A return with no argument returns the scalar value undef in scalar context,
2736							# an empty list () in list context, and (naturally) nothing at all in void
2737							# context.
2738
2739							my $offset = $_[1];
2740							if (($offset > scalar(@char)) or ($offset < (-1 * scalar(@char)))) {
2741							return;
2742							}
2743
2744							# substr($string,$offset,$length,$replacement)
2745							if (@_ == 4) {
2746							my(undef,undef,$length,$replacement) = @_;
2747							my $substr = join '', splice(@char, $offset, $length, $replacement);
2748							$_[0] = join '', @char;
2749
2750							# return $substr; this doesn't work, don't say "return"
2751							$substr;
2752							}
2753
2754							# substr($string,$offset,$length)
2755							elsif (@_ == 3) {
2756							my(undef,undef,$length) = @_;
2757							my $octet_offset = 0;
2758							my $octet_length = 0;
2759							if ($offset == 0) {
2760							$octet_offset = 0;
2761							}
2762							elsif ($offset > 0) {
2763							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2764							}
2765							else {
2766							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2767							}
2768							if ($length == 0) {
2769							$octet_length = 0;
2770							}
2771							elsif ($length > 0) {
2772							$octet_length = CORE::length(join '', @char[$offset..$offset+$length-1]);
2773							}
2774							else {
2775							$octet_length = -1 * CORE::length(join '', @char[$#char+$length+1..$#char]);
2776							}
2777							CORE::substr($_[0], $octet_offset, $octet_length);
2778							}
2779
2780							# substr($string,$offset)
2781							else {
2782							my $octet_offset = 0;
2783							if ($offset == 0) {
2784							$octet_offset = 0;
2785							}
2786							elsif ($offset > 0) {
2787							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2788							}
2789							else {
2790							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2791							}
2792							CORE::substr($_[0], $octet_offset);
2793							}
2794							}
2795							END
2796							}
2797
2798							#
2799							# Latin-1 index by character
2800							#
2801							sub Char::Latin1::index($$;$) {
2802
2803	0			0	1		my $index;
2804	0	0					if (@_ == 3) {
2805	0						$index = Char::Elatin1::index($_[0], $_[1], CORE::length(Char::Latin1::substr($_[0], 0, $_[2])));
2806							}
2807							else {
2808	0						$index = Char::Elatin1::index($_[0], $_[1]);
2809							}
2810
2811	0	0					if ($index == -1) {
2812	0						return -1;
2813							}
2814							else {
2815	0						return Char::Latin1::length(CORE::substr $_[0], 0, $index);
2816							}
2817							}
2818
2819							#
2820							# Latin-1 rindex by character
2821							#
2822							sub Char::Latin1::rindex($$;$) {
2823
2824	0			0	1		my $rindex;
2825	0	0					if (@_ == 3) {
2826	0						$rindex = Char::Elatin1::rindex($_[0], $_[1], CORE::length(Char::Latin1::substr($_[0], 0, $_[2])));
2827							}
2828							else {
2829	0						$rindex = Char::Elatin1::rindex($_[0], $_[1]);
2830							}
2831
2832	0	0					if ($rindex == -1) {
2833	0						return -1;
2834							}
2835							else {
2836	0						return Char::Latin1::length(CORE::substr $_[0], 0, $rindex);
2837							}
2838							}
2839
2840							#
2841							# instead of Carp::carp
2842							#
2843							sub carp {
2844	0			0	0		my($package,$filename,$line) = caller(1);
2845	0						print STDERR "@_ at $filename line $line.\n";
2846							}
2847
2848							#
2849							# instead of Carp::croak
2850							#
2851							sub croak {
2852	0			0	0		my($package,$filename,$line) = caller(1);
2853	0						print STDERR "@_ at $filename line $line.\n";
2854	0						die "\n";
2855							}
2856
2857							#
2858							# instead of Carp::cluck
2859							#
2860							sub cluck {
2861	0			0	0		my $i = 0;
2862	0						my @cluck = ();
2863	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2864	0						push @cluck, "[$i] $filename($line) $package::$subroutine\n";
2865	0						$i++;
2866							}
2867	0						print STDERR CORE::reverse @cluck;
2868	0						print STDERR "\n";
2869	0						carp @_;
2870							}
2871
2872							#
2873							# instead of Carp::confess
2874							#
2875							sub confess {
2876	0			0	0		my $i = 0;
2877	0						my @confess = ();
2878	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2879	0						push @confess, "[$i] $filename($line) $package::$subroutine\n";
2880	0						$i++;
2881							}
2882	0						print STDERR CORE::reverse @confess;
2883	0						print STDERR "\n";
2884	0						croak @_;
2885							}
2886
2887							1;
2888
2889							__END__