File Coverage

Char/Elatin9.pm

Criterion	Covered	Total	%
statement	51	954	5.3
branch	4	562	0.7
condition	1	180	0.5
subroutine	20	85	23.5
pod	7	50	14.0
total	83	1831	4.5

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# This file is * FOR CPAN USE ONLY *.
3							#
4							package Char::Elatin9;
5							######################################################################
6							#
7							# Char::Elatin9 - Run-time routines for Char/Latin9.pm
8							#
9							# http://search.cpan.org/dist/Char-Latin9/
10							#
11							# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 INABA Hitoshi
12							######################################################################
13
14	177			177		4039	use 5.00503; # Galapagos Consensus 1998 for primetools
	177					880
	177					10180
15							# use 5.008001; # Lancaster Consensus 2013 for toolchains
16
17							# 12.3. Delaying use Until Runtime
18							# in Chapter 12. Packages, Libraries, and Modules
19							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
20							# (and so on)
21
22	177			177		14382	BEGIN { eval q{ use vars qw($VERSION) } }
	177			177		1261
	177					350
	177					48634
23							$VERSION = sprintf '%d.%02d', q$Revision: 1.00 $ =~ /(\d+)/xmsg;
24
25							BEGIN {
26	177	50		177		1340	if ($^X =~ / jperl /oxmsi) {
27	0					0	die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)";
28							}
29	177					390	if (CORE::ord('A') == 193) {
30							die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).";
31							}
32	177					33041	if (CORE::ord('A') != 0x41) {
33							die __FILE__, ": is not US-ASCII script (must be US-ASCII script).";
34							}
35							}
36
37							BEGIN {
38
39							# instead of utf8.pm
40	177			177		14209	eval q{
	177			177		1176
	177			61		336
	177					35549
	61					12462
	55					11090
	73					18623
	56					11099
	51					11569
	58					11294
41							no warnings qw(redefine);
42							*utf8::upgrade = sub { CORE::length $_[0] };
43							*utf8::downgrade = sub { 1 };
44							*utf8::encode = sub { };
45							*utf8::decode = sub { 1 };
46							*utf8::is_utf8 = sub { };
47							*utf8::valid = sub { 1 };
48							};
49	177	50				179399	if ($@) {
50	0					0	*utf8::upgrade = sub { CORE::length $_[0] };
	0					0
51	0					0	*utf8::downgrade = sub { 1 };
	0					0
52	0					0	*utf8::encode = sub { };
	0					0
53	0					0	*utf8::decode = sub { 1 };
	0					0
54	0					0	*utf8::is_utf8 = sub { };
	0					0
55	0					0	*utf8::valid = sub { 1 };
	0					0
56							}
57							}
58
59							# instead of Symbol.pm
60							BEGIN {
61	177			177		457	my $genpkg = "Symbol::";
62	177					9877	my $genseq = 0;
63
64							sub gensym () {
65	0			0	0	0	my $name = "GEN" . $genseq++;
66
67							# here, no strict qw(refs); if strict.pm exists
68
69	0					0	my $ref = \*{$genpkg . $name};
	0					0
70	0					0	delete $$genpkg{$name};
71	0					0	return $ref;
72							}
73
74							sub qualify ($;$) {
75	0			0	0	0	my ($name) = @_;
76	0	0	0			0	if (!ref($name) && (Char::Elatin9::index($name, '::') == -1) && (Char::Elatin9::index($name, "'") == -1)) {
			0
77	0					0	my $pkg;
78	0					0	my %global = map {$_ => 1} qw(ARGV ARGVOUT ENV INC SIG STDERR STDIN STDOUT DATA);
	0					0
79
80							# Global names: special character, "^xyz", or other.
81	0	0	0			0	if ($name =~ /^(([^a-z])\|(\^[a-z_]+))\z/i \|\| $global{$name}) {
82							# RGS 2001-11-05 : translate leading ^X to control-char
83	0					0	$name =~ s/^\^([a-z_])/'qq(\c'.$1.')'/eei;
	0					0
84	0					0	$pkg = "main";
85							}
86							else {
87	0	0				0	$pkg = (@_ > 1) ? $_[1] : caller;
88							}
89	0					0	$name = $pkg . "::" . $name;
90							}
91	0					0	return $name;
92							}
93
94							sub qualify_to_ref ($;$) {
95
96							# here, no strict qw(refs); if strict.pm exists
97
98	0	0		0	0	0	return \*{ qualify $_[0], @_ > 1 ? $_[1] : caller };
	0					0
99							}
100							}
101
102							# Column: local $@
103							# in Chapter 9. Osaete okitai Perl no kiso
104							# of ISBN 10: 4798119172 \| ISBN 13: 978-4798119175 MODAN Perl NYUMON
105							# (and so on)
106
107							# use strict; if strict.pm exists
108							BEGIN {
109	177	50		177		408	if (eval { local $@; CORE::require strict }) {
	177					465
	177					2118
110	177					31287	strict::->import;
111							}
112							}
113
114							# P.714 29.2.39. flock
115							# in Chapter 29: Functions
116							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
117
118							# P.863 flock
119							# in Chapter 27: Functions
120							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
121
122							sub LOCK_SH() {1}
123							sub LOCK_EX() {2}
124							sub LOCK_UN() {8}
125							sub LOCK_NB() {4}
126
127							# instead of Carp.pm
128							sub carp;
129							sub croak;
130							sub cluck;
131							sub confess;
132
133							my $your_char = q{[\x00-\xFF]};
134
135							# regexp of character
136	177			177		19945	BEGIN { eval q{ use vars qw($q_char) } }
	177			177		1948
	177					371
	177					20143
137							$q_char = qr/$your_char/oxms;
138
139							#
140							# Latin-9 character range per length
141							#
142							my %range_tr = ();
143
144							#
145							# alias of encoding name
146							#
147	177			177		16019	BEGIN { eval q{ use vars qw($encoding_alias) } }
	177			177		1270
	177					390
	177					516095
148
149							#
150							# Latin-9 case conversion
151							#
152							my %lc = ();
153							@lc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
154							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
155							my %uc = ();
156							@uc{qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)} =
157							qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
158							my %fc = ();
159							@fc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
160							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
161
162							if (0) {
163							}
164
165							elsif (__PACKAGE__ =~ / \b Elatin9 \z/oxms) {
166							%range_tr = (
167							1 => [ [0x00..0xFF],
168							],
169							);
170							$encoding_alias = qr/ \b (?: iso[-_ ]?8859-15 \| iec[- ]?8859-15 \| latin-?9 ) \b /oxmsi;
171
172							%lc = (%lc,
173							"\xA6" => "\xA8", # LATIN LETTER S WITH CARON
174							"\xB4" => "\xB8", # LATIN LETTER Z WITH CARON
175							"\xBC" => "\xBD", # LATIN LIGATURE OE
176							"\xBE" => "\xFF", # LATIN LETTER Y WITH DIAERESIS
177							"\xC0" => "\xE0", # LATIN LETTER A WITH GRAVE
178							"\xC1" => "\xE1", # LATIN LETTER A WITH ACUTE
179							"\xC2" => "\xE2", # LATIN LETTER A WITH CIRCUMFLEX
180							"\xC3" => "\xE3", # LATIN LETTER A WITH TILDE
181							"\xC4" => "\xE4", # LATIN LETTER A WITH DIAERESIS
182							"\xC5" => "\xE5", # LATIN LETTER A WITH RING ABOVE
183							"\xC6" => "\xE6", # LATIN LETTER AE
184							"\xC7" => "\xE7", # LATIN LETTER C WITH CEDILLA
185							"\xC8" => "\xE8", # LATIN LETTER E WITH GRAVE
186							"\xC9" => "\xE9", # LATIN LETTER E WITH ACUTE
187							"\xCA" => "\xEA", # LATIN LETTER E WITH CIRCUMFLEX
188							"\xCB" => "\xEB", # LATIN LETTER E WITH DIAERESIS
189							"\xCC" => "\xEC", # LATIN LETTER I WITH GRAVE
190							"\xCD" => "\xED", # LATIN LETTER I WITH ACUTE
191							"\xCE" => "\xEE", # LATIN LETTER I WITH CIRCUMFLEX
192							"\xCF" => "\xEF", # LATIN LETTER I WITH DIAERESIS
193							"\xD0" => "\xF0", # LATIN LETTER ETH
194							"\xD1" => "\xF1", # LATIN LETTER N WITH TILDE
195							"\xD2" => "\xF2", # LATIN LETTER O WITH GRAVE
196							"\xD3" => "\xF3", # LATIN LETTER O WITH ACUTE
197							"\xD4" => "\xF4", # LATIN LETTER O WITH CIRCUMFLEX
198							"\xD5" => "\xF5", # LATIN LETTER O WITH TILDE
199							"\xD6" => "\xF6", # LATIN LETTER O WITH DIAERESIS
200							"\xD8" => "\xF8", # LATIN LETTER O WITH STROKE
201							"\xD9" => "\xF9", # LATIN LETTER U WITH GRAVE
202							"\xDA" => "\xFA", # LATIN LETTER U WITH ACUTE
203							"\xDB" => "\xFB", # LATIN LETTER U WITH CIRCUMFLEX
204							"\xDC" => "\xFC", # LATIN LETTER U WITH DIAERESIS
205							"\xDD" => "\xFD", # LATIN LETTER Y WITH ACUTE
206							"\xDE" => "\xFE", # LATIN LETTER THORN
207							);
208
209							%uc = (%uc,
210							"\xA8" => "\xA6", # LATIN LETTER S WITH CARON
211							"\xB8" => "\xB4", # LATIN LETTER Z WITH CARON
212							"\xBD" => "\xBC", # LATIN LIGATURE OE
213							"\xE0" => "\xC0", # LATIN LETTER A WITH GRAVE
214							"\xE1" => "\xC1", # LATIN LETTER A WITH ACUTE
215							"\xE2" => "\xC2", # LATIN LETTER A WITH CIRCUMFLEX
216							"\xE3" => "\xC3", # LATIN LETTER A WITH TILDE
217							"\xE4" => "\xC4", # LATIN LETTER A WITH DIAERESIS
218							"\xE5" => "\xC5", # LATIN LETTER A WITH RING ABOVE
219							"\xE6" => "\xC6", # LATIN LETTER AE
220							"\xE7" => "\xC7", # LATIN LETTER C WITH CEDILLA
221							"\xE8" => "\xC8", # LATIN LETTER E WITH GRAVE
222							"\xE9" => "\xC9", # LATIN LETTER E WITH ACUTE
223							"\xEA" => "\xCA", # LATIN LETTER E WITH CIRCUMFLEX
224							"\xEB" => "\xCB", # LATIN LETTER E WITH DIAERESIS
225							"\xEC" => "\xCC", # LATIN LETTER I WITH GRAVE
226							"\xED" => "\xCD", # LATIN LETTER I WITH ACUTE
227							"\xEE" => "\xCE", # LATIN LETTER I WITH CIRCUMFLEX
228							"\xEF" => "\xCF", # LATIN LETTER I WITH DIAERESIS
229							"\xF0" => "\xD0", # LATIN LETTER ETH
230							"\xF1" => "\xD1", # LATIN LETTER N WITH TILDE
231							"\xF2" => "\xD2", # LATIN LETTER O WITH GRAVE
232							"\xF3" => "\xD3", # LATIN LETTER O WITH ACUTE
233							"\xF4" => "\xD4", # LATIN LETTER O WITH CIRCUMFLEX
234							"\xF5" => "\xD5", # LATIN LETTER O WITH TILDE
235							"\xF6" => "\xD6", # LATIN LETTER O WITH DIAERESIS
236							"\xF8" => "\xD8", # LATIN LETTER O WITH STROKE
237							"\xF9" => "\xD9", # LATIN LETTER U WITH GRAVE
238							"\xFA" => "\xDA", # LATIN LETTER U WITH ACUTE
239							"\xFB" => "\xDB", # LATIN LETTER U WITH CIRCUMFLEX
240							"\xFC" => "\xDC", # LATIN LETTER U WITH DIAERESIS
241							"\xFD" => "\xDD", # LATIN LETTER Y WITH ACUTE
242							"\xFE" => "\xDE", # LATIN LETTER THORN
243							"\xFF" => "\xBE", # LATIN LETTER Y WITH DIAERESIS
244							);
245
246							%fc = (%fc,
247							"\xA6" => "\xA8", # LATIN CAPITAL LETTER S WITH CARON --> LATIN SMALL LETTER S WITH CARON
248							"\xB4" => "\xB8", # LATIN CAPITAL LETTER Z WITH CARON --> LATIN SMALL LETTER Z WITH CARON
249							"\xBC" => "\xBD", # LATIN CAPITAL LIGATURE OE --> LATIN SMALL LIGATURE OE
250							"\xBE" => "\xFF", # LATIN CAPITAL LETTER Y WITH DIAERESIS --> LATIN SMALL LETTER Y WITH DIAERESIS
251							"\xC0" => "\xE0", # LATIN CAPITAL LETTER A WITH GRAVE --> LATIN SMALL LETTER A WITH GRAVE
252							"\xC1" => "\xE1", # LATIN CAPITAL LETTER A WITH ACUTE --> LATIN SMALL LETTER A WITH ACUTE
253							"\xC2" => "\xE2", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX --> LATIN SMALL LETTER A WITH CIRCUMFLEX
254							"\xC3" => "\xE3", # LATIN CAPITAL LETTER A WITH TILDE --> LATIN SMALL LETTER A WITH TILDE
255							"\xC4" => "\xE4", # LATIN CAPITAL LETTER A WITH DIAERESIS --> LATIN SMALL LETTER A WITH DIAERESIS
256							"\xC5" => "\xE5", # LATIN CAPITAL LETTER A WITH RING ABOVE --> LATIN SMALL LETTER A WITH RING ABOVE
257							"\xC6" => "\xE6", # LATIN CAPITAL LETTER AE --> LATIN SMALL LETTER AE
258							"\xC7" => "\xE7", # LATIN CAPITAL LETTER C WITH CEDILLA --> LATIN SMALL LETTER C WITH CEDILLA
259							"\xC8" => "\xE8", # LATIN CAPITAL LETTER E WITH GRAVE --> LATIN SMALL LETTER E WITH GRAVE
260							"\xC9" => "\xE9", # LATIN CAPITAL LETTER E WITH ACUTE --> LATIN SMALL LETTER E WITH ACUTE
261							"\xCA" => "\xEA", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX --> LATIN SMALL LETTER E WITH CIRCUMFLEX
262							"\xCB" => "\xEB", # LATIN CAPITAL LETTER E WITH DIAERESIS --> LATIN SMALL LETTER E WITH DIAERESIS
263							"\xCC" => "\xEC", # LATIN CAPITAL LETTER I WITH GRAVE --> LATIN SMALL LETTER I WITH GRAVE
264							"\xCD" => "\xED", # LATIN CAPITAL LETTER I WITH ACUTE --> LATIN SMALL LETTER I WITH ACUTE
265							"\xCE" => "\xEE", # LATIN CAPITAL LETTER I WITH CIRCUMFLEX --> LATIN SMALL LETTER I WITH CIRCUMFLEX
266							"\xCF" => "\xEF", # LATIN CAPITAL LETTER I WITH DIAERESIS --> LATIN SMALL LETTER I WITH DIAERESIS
267							"\xD0" => "\xF0", # LATIN CAPITAL LETTER ETH --> LATIN SMALL LETTER ETH
268							"\xD1" => "\xF1", # LATIN CAPITAL LETTER N WITH TILDE --> LATIN SMALL LETTER N WITH TILDE
269							"\xD2" => "\xF2", # LATIN CAPITAL LETTER O WITH GRAVE --> LATIN SMALL LETTER O WITH GRAVE
270							"\xD3" => "\xF3", # LATIN CAPITAL LETTER O WITH ACUTE --> LATIN SMALL LETTER O WITH ACUTE
271							"\xD4" => "\xF4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX --> LATIN SMALL LETTER O WITH CIRCUMFLEX
272							"\xD5" => "\xF5", # LATIN CAPITAL LETTER O WITH TILDE --> LATIN SMALL LETTER O WITH TILDE
273							"\xD6" => "\xF6", # LATIN CAPITAL LETTER O WITH DIAERESIS --> LATIN SMALL LETTER O WITH DIAERESIS
274							"\xD8" => "\xF8", # LATIN CAPITAL LETTER O WITH STROKE --> LATIN SMALL LETTER O WITH STROKE
275							"\xD9" => "\xF9", # LATIN CAPITAL LETTER U WITH GRAVE --> LATIN SMALL LETTER U WITH GRAVE
276							"\xDA" => "\xFA", # LATIN CAPITAL LETTER U WITH ACUTE --> LATIN SMALL LETTER U WITH ACUTE
277							"\xDB" => "\xFB", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX --> LATIN SMALL LETTER U WITH CIRCUMFLEX
278							"\xDC" => "\xFC", # LATIN CAPITAL LETTER U WITH DIAERESIS --> LATIN SMALL LETTER U WITH DIAERESIS
279							"\xDD" => "\xFD", # LATIN CAPITAL LETTER Y WITH ACUTE --> LATIN SMALL LETTER Y WITH ACUTE
280							"\xDE" => "\xFE", # LATIN CAPITAL LETTER THORN --> LATIN SMALL LETTER THORN
281							"\xDF" => "\x73\x73", # LATIN SMALL LETTER SHARP S --> LATIN SMALL LETTER S, LATIN SMALL LETTER S
282							);
283							}
284
285							else {
286							croak "Don't know my package name '@{[__PACKAGE__]}'";
287							}
288
289							#
290							# @ARGV wildcard globbing
291							#
292							sub import {
293
294	0	0		0		0	if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
295	0					0	my @argv = ();
296	0					0	for (@ARGV) {
297
298							# has space
299	0	0				0	if (/\A (?:$q_char)*? [ ] /oxms) {
		0
300	0	0				0	if (my @glob = Char::Elatin9::glob(qq{"$_"})) {
301	0					0	push @argv, @glob;
302							}
303							else {
304	0					0	push @argv, $_;
305							}
306							}
307
308							# has wildcard metachar
309							elsif (/\A (?:$q_char)? [?] /oxms) {
310	0	0				0	if (my @glob = Char::Elatin9::glob($_)) {
311	0					0	push @argv, @glob;
312							}
313							else {
314	0					0	push @argv, $_;
315							}
316							}
317
318							# no wildcard globbing
319							else {
320	0					0	push @argv, $_;
321							}
322							}
323	0					0	@ARGV = @argv;
324							}
325							}
326
327							# P.230 Care with Prototypes
328							# in Chapter 6: Subroutines
329							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
330							#
331							# If you aren't careful, you can get yourself into trouble with prototypes.
332							# But if you are careful, you can do a lot of neat things with them. This is
333							# all very powerful, of course, and should only be used in moderation to make
334							# the world a better place.
335
336							# P.332 Care with Prototypes
337							# in Chapter 7: Subroutines
338							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
339							#
340							# If you aren't careful, you can get yourself into trouble with prototypes.
341							# But if you are careful, you can do a lot of neat things with them. This is
342							# all very powerful, of course, and should only be used in moderation to make
343							# the world a better place.
344
345							#
346							# Prototypes of subroutines
347							#
348	0			0		0	sub unimport {}
349							sub Char::Elatin9::split(;$$$);
350							sub Char::Elatin9::tr($$$$;$);
351							sub Char::Elatin9::chop(@);
352							sub Char::Elatin9::index($$;$);
353							sub Char::Elatin9::rindex($$;$);
354							sub Char::Elatin9::lcfirst(@);
355							sub Char::Elatin9::lcfirst_();
356							sub Char::Elatin9::lc(@);
357							sub Char::Elatin9::lc_();
358							sub Char::Elatin9::ucfirst(@);
359							sub Char::Elatin9::ucfirst_();
360							sub Char::Elatin9::uc(@);
361							sub Char::Elatin9::uc_();
362							sub Char::Elatin9::fc(@);
363							sub Char::Elatin9::fc_();
364							sub Char::Elatin9::ignorecase;
365							sub Char::Elatin9::classic_character_class;
366							sub Char::Elatin9::capture;
367							sub Char::Elatin9::chr(;$);
368							sub Char::Elatin9::chr_();
369							sub Char::Elatin9::glob($);
370							sub Char::Elatin9::glob_();
371
372							sub Char::Latin9::ord(;$);
373							sub Char::Latin9::ord_();
374							sub Char::Latin9::reverse(@);
375							sub Char::Latin9::getc(;*@);
376							sub Char::Latin9::length(;$);
377							sub Char::Latin9::substr($$;$$);
378							sub Char::Latin9::index($$;$);
379							sub Char::Latin9::rindex($$;$);
380
381							#
382							# Regexp work
383							#
384	177			177		24251	BEGIN { eval q{ use vars qw(
	177			177		1505
	177					428
	177					106406
385							$Char::Latin9::re_a
386							$Char::Latin9::re_t
387							$Char::Latin9::re_n
388							$Char::Latin9::re_r
389							) } }
390
391							#
392							# Character class
393							#
394	177			177		16274	BEGIN { eval q{ use vars qw(
	177			177		1174
	177					552
	177					4025688
395							$dot
396							$dot_s
397							$eD
398							$eS
399							$eW
400							$eH
401							$eV
402							$eR
403							$eN
404							$not_alnum
405							$not_alpha
406							$not_ascii
407							$not_blank
408							$not_cntrl
409							$not_digit
410							$not_graph
411							$not_lower
412							$not_lower_i
413							$not_print
414							$not_punct
415							$not_space
416							$not_upper
417							$not_upper_i
418							$not_word
419							$not_xdigit
420							$eb
421							$eB
422							) } }
423
424							${Char::Elatin9::dot} = qr{(?:[^\x0A])};
425							${Char::Elatin9::dot_s} = qr{(?:[\x00-\xFF])};
426							${Char::Elatin9::eD} = qr{(?:[^0-9])};
427
428							# Vertical tabs are now whitespace
429							# \s in a regex now matches a vertical tab in all circumstances.
430							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
431							# ${Char::Elatin9::eS} = qr{(?:[^\x09\x0A \x0C\x0D\x20])};
432							# ${Char::Elatin9::eS} = qr{(?:[^\x09\x0A\x0B\x0C\x0D\x20])};
433							${Char::Elatin9::eS} = qr{(?:[^\s])};
434
435							${Char::Elatin9::eW} = qr{(?:[^0-9A-Z_a-z])};
436							${Char::Elatin9::eH} = qr{(?:[^\x09\x20])};
437							${Char::Elatin9::eV} = qr{(?:[^\x0A\x0B\x0C\x0D])};
438							${Char::Elatin9::eR} = qr{(?:\x0D\x0A\|[\x0A\x0D])};
439							${Char::Elatin9::eN} = qr{(?:[^\x0A])};
440							${Char::Elatin9::not_alnum} = qr{(?:[^\x30-\x39\x41-\x5A\x61-\x7A])};
441							${Char::Elatin9::not_alpha} = qr{(?:[^\x41-\x5A\x61-\x7A])};
442							${Char::Elatin9::not_ascii} = qr{(?:[^\x00-\x7F])};
443							${Char::Elatin9::not_blank} = qr{(?:[^\x09\x20])};
444							${Char::Elatin9::not_cntrl} = qr{(?:[^\x00-\x1F\x7F])};
445							${Char::Elatin9::not_digit} = qr{(?:[^\x30-\x39])};
446							${Char::Elatin9::not_graph} = qr{(?:[^\x21-\x7F])};
447							${Char::Elatin9::not_lower} = qr{(?:[^\x61-\x7A])};
448							${Char::Elatin9::not_lower_i} = qr{(?:[\x00-\xFF])};
449							${Char::Elatin9::not_print} = qr{(?:[^\x20-\x7F])};
450							${Char::Elatin9::not_punct} = qr{(?:[^\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E])};
451							${Char::Elatin9::not_space} = qr{(?:[^\s\x0B])};
452							${Char::Elatin9::not_upper} = qr{(?:[^\x41-\x5A])};
453							${Char::Elatin9::not_upper_i} = qr{(?:[\x00-\xFF])};
454							${Char::Elatin9::not_word} = qr{(?:[^\x30-\x39\x41-\x5A\x5F\x61-\x7A])};
455							${Char::Elatin9::not_xdigit} = qr{(?:[^\x30-\x39\x41-\x46\x61-\x66])};
456							${Char::Elatin9::eb} = qr{(?:\A(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[0-9A-Z_a-z])\|(?<=[0-9A-Z_a-z])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]\|\z))};
457							${Char::Elatin9::eB} = qr{(?:(?<=[0-9A-Z_a-z])(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]))};
458
459							# avoid: Name "Char::Elatin9::foo" used only once: possible typo at here.
460							${Char::Elatin9::dot} = ${Char::Elatin9::dot};
461							${Char::Elatin9::dot_s} = ${Char::Elatin9::dot_s};
462							${Char::Elatin9::eD} = ${Char::Elatin9::eD};
463							${Char::Elatin9::eS} = ${Char::Elatin9::eS};
464							${Char::Elatin9::eW} = ${Char::Elatin9::eW};
465							${Char::Elatin9::eH} = ${Char::Elatin9::eH};
466							${Char::Elatin9::eV} = ${Char::Elatin9::eV};
467							${Char::Elatin9::eR} = ${Char::Elatin9::eR};
468							${Char::Elatin9::eN} = ${Char::Elatin9::eN};
469							${Char::Elatin9::not_alnum} = ${Char::Elatin9::not_alnum};
470							${Char::Elatin9::not_alpha} = ${Char::Elatin9::not_alpha};
471							${Char::Elatin9::not_ascii} = ${Char::Elatin9::not_ascii};
472							${Char::Elatin9::not_blank} = ${Char::Elatin9::not_blank};
473							${Char::Elatin9::not_cntrl} = ${Char::Elatin9::not_cntrl};
474							${Char::Elatin9::not_digit} = ${Char::Elatin9::not_digit};
475							${Char::Elatin9::not_graph} = ${Char::Elatin9::not_graph};
476							${Char::Elatin9::not_lower} = ${Char::Elatin9::not_lower};
477							${Char::Elatin9::not_lower_i} = ${Char::Elatin9::not_lower_i};
478							${Char::Elatin9::not_print} = ${Char::Elatin9::not_print};
479							${Char::Elatin9::not_punct} = ${Char::Elatin9::not_punct};
480							${Char::Elatin9::not_space} = ${Char::Elatin9::not_space};
481							${Char::Elatin9::not_upper} = ${Char::Elatin9::not_upper};
482							${Char::Elatin9::not_upper_i} = ${Char::Elatin9::not_upper_i};
483							${Char::Elatin9::not_word} = ${Char::Elatin9::not_word};
484							${Char::Elatin9::not_xdigit} = ${Char::Elatin9::not_xdigit};
485							${Char::Elatin9::eb} = ${Char::Elatin9::eb};
486							${Char::Elatin9::eB} = ${Char::Elatin9::eB};
487
488							#
489							# Latin-9 split
490							#
491							sub Char::Elatin9::split(;$$$) {
492
493							# P.794 29.2.161. split
494							# in Chapter 29: Functions
495							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
496
497							# P.951 split
498							# in Chapter 27: Functions
499							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
500
501	0			0	0	0	my $pattern = $_[0];
502	0					0	my $string = $_[1];
503	0					0	my $limit = $_[2];
504
505							# if $pattern is also omitted or is the literal space, " "
506	0	0				0	if (not defined $pattern) {
507	0					0	$pattern = ' ';
508							}
509
510							# if $string is omitted, the function splits the $_ string
511	0	0				0	if (not defined $string) {
512	0	0				0	if (defined $_) {
513	0					0	$string = $_;
514							}
515							else {
516	0					0	$string = '';
517							}
518							}
519
520	0					0	my @split = ();
521
522							# when string is empty
523	0	0				0	if ($string eq '') {
		0
524
525							# resulting list value in list context
526	0	0				0	if (wantarray) {
527	0					0	return @split;
528							}
529
530							# count of substrings in scalar context
531							else {
532	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
533	0					0	@_ = @split;
534	0					0	return scalar @_;
535							}
536							}
537
538							# split's first argument is more consistently interpreted
539							#
540							# After some changes earlier in v5.17, split's behavior has been simplified:
541							# if the PATTERN argument evaluates to a string containing one space, it is
542							# treated the way that a literal string containing one space once was.
543							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#split's_first_argument_is_more_consistently_interpreted
544
545							# if $pattern is also omitted or is the literal space, " ", the function splits
546							# on whitespace, /\s+/, after skipping any leading whitespace
547							# (and so on)
548
549							elsif ($pattern eq ' ') {
550	0	0				0	if (not defined $limit) {
551	0					0	return CORE::split(' ', $string);
552							}
553							else {
554	0					0	return CORE::split(' ', $string, $limit);
555							}
556							}
557
558							# if $limit is negative, it is treated as if an arbitrarily large $limit has been specified
559	0	0	0			0	if ((not defined $limit) or ($limit <= 0)) {
		0
560
561							# a pattern capable of matching either the null string or something longer than the
562							# null string will split the value of $string into separate characters wherever it
563							# matches the null string between characters
564							# (and so on)
565
566	0	0				0	if ('' =~ / \A $pattern \z /xms) {
567	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
568	0					0	my $limit = scalar(() = $string =~ /($pattern)/oxmsg);
569
570							# P.1024 Appendix W.10 Multibyte Processing
571							# of ISBN 1-56592-224-7 CJKV Information Processing
572							# (and so on)
573
574							# the //m modifier is assumed when you split on the pattern /^/
575							# (and so on)
576
577							# V
578	0		0			0	while ((--$limit > 0) and ($string =~ s/\A((?:$q_char)+?)$pattern//m)) {
579
580							# if the $pattern contains parentheses, then the substring matched by each pair of parentheses
581							# is included in the resulting list, interspersed with the fields that are ordinarily returned
582							# (and so on)
583
584	0					0	local $@;
585	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
586	0					0	push @split, eval('$' . $digit);
587							}
588							}
589							}
590
591							else {
592	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
593
594							# V
595	0					0	while ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
596	0					0	local $@;
597	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
598	0					0	push @split, eval('$' . $digit);
599							}
600							}
601							}
602							}
603
604							elsif ($limit > 0) {
605	0	0				0	if ('' =~ / \A $pattern \z /xms) {
606	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
607	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
608
609							# V
610	0	0				0	if ($string =~ s/\A((?:$q_char)+?)$pattern//m) {
611	0					0	local $@;
612	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
613	0					0	push @split, eval('$' . $digit);
614							}
615							}
616							}
617							}
618							else {
619	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
620	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
621
622							# V
623	0	0				0	if ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
624	0					0	local $@;
625	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
626	0					0	push @split, eval('$' . $digit);
627							}
628							}
629							}
630							}
631							}
632
633	0	0				0	if (CORE::length($string) > 0) {
634	0					0	push @split, $string;
635							}
636
637							# if $_[2] (NOT "$limit") is omitted or zero, trailing null fields are stripped from the result
638	0	0	0			0	if ((not defined $_[2]) or ($_[2] == 0)) {
639	0		0			0	while ((scalar(@split) >= 1) and ($split[-1] eq '')) {
640	0					0	pop @split;
641							}
642							}
643
644							# resulting list value in list context
645	0	0				0	if (wantarray) {
646	0					0	return @split;
647							}
648
649							# count of substrings in scalar context
650							else {
651	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
652	0					0	@_ = @split;
653	0					0	return scalar @_;
654							}
655							}
656
657							#
658							# get last subexpression offsets
659							#
660							sub _last_subexpression_offsets {
661	0			0		0	my $pattern = $_[0];
662
663							# remove comment
664	0					0	$pattern =~ s/$\?\# .*? $//oxmsg;
665
666	0					0	my $modifier = '';
667	0	0				0	if ($pattern =~ /\(\?\^? ([\-A-Za-z]+) :/oxms) {
668	0					0	$modifier = $1;
669	0					0	$modifier =~ s/-[A-Za-z]*//;
670							}
671
672							# with /x modifier
673	0					0	my @char = ();
674	0	0				0	if ($modifier =~ /x/oxms) {
675	0					0	@char = $pattern =~ /\G(
676							\\ (?:$q_char) \|
677							\# (?:$q_char)*? $ \|
678							\[ (?: \\\] \| (?:$q_char))+? \] \|
679							\(\? \|
680							(?:$q_char)
681							)/oxmsg;
682							}
683
684							# without /x modifier
685							else {
686	0					0	@char = $pattern =~ /\G(
687							\\ (?:$q_char) \|
688							\[ (?: \\\] \| (?:$q_char))+? \] \|
689							\(\? \|
690							(?:$q_char)
691							)/oxmsg;
692							}
693
694	0					0	return scalar grep { $_ eq '(' } @char;
	0					0
695							}
696
697							#
698							# Latin-9 transliteration (tr///)
699							#
700							sub Char::Elatin9::tr($$$$;$) {
701
702	0			0	0	0	my $bind_operator = $_[1];
703	0					0	my $searchlist = $_[2];
704	0					0	my $replacementlist = $_[3];
705	0		0			0	my $modifier = $_[4] \|\| '';
706
707	0	0				0	if ($modifier =~ /r/oxms) {
708	0	0				0	if ($bind_operator =~ / !~ /oxms) {
709	0					0	croak "Using !~ with tr///r doesn't make sense";
710							}
711							}
712
713	0					0	my @char = $_[0] =~ /\G ($q_char) /oxmsg;
714	0					0	my @searchlist = _charlist_tr($searchlist);
715	0					0	my @replacementlist = _charlist_tr($replacementlist);
716
717	0					0	my %tr = ();
718	0					0	for (my $i=0; $i <= $#searchlist; $i++) {
719	0	0				0	if (not exists $tr{$searchlist[$i]}) {
720	0	0	0			0	if (defined $replacementlist[$i] and ($replacementlist[$i] ne '')) {
		0	0
		0
721	0					0	$tr{$searchlist[$i]} = $replacementlist[$i];
722							}
723							elsif ($modifier =~ /d/oxms) {
724	0					0	$tr{$searchlist[$i]} = '';
725							}
726							elsif (defined $replacementlist[-1] and ($replacementlist[-1] ne '')) {
727	0					0	$tr{$searchlist[$i]} = $replacementlist[-1];
728							}
729							else {
730	0					0	$tr{$searchlist[$i]} = $searchlist[$i];
731							}
732							}
733							}
734
735	0					0	my $tr = 0;
736	0					0	my $replaced = '';
737	0	0				0	if ($modifier =~ /c/oxms) {
738	0					0	while (defined(my $char = shift @char)) {
739	0	0				0	if (not exists $tr{$char}) {
740	0	0				0	if (defined $replacementlist[0]) {
741	0					0	$replaced .= $replacementlist[0];
742							}
743	0					0	$tr++;
744	0	0				0	if ($modifier =~ /s/oxms) {
745	0		0			0	while (@char and (not exists $tr{$char[0]})) {
746	0					0	shift @char;
747	0					0	$tr++;
748							}
749							}
750							}
751							else {
752	0					0	$replaced .= $char;
753							}
754							}
755							}
756							else {
757	0					0	while (defined(my $char = shift @char)) {
758	0	0				0	if (exists $tr{$char}) {
759	0					0	$replaced .= $tr{$char};
760	0					0	$tr++;
761	0	0				0	if ($modifier =~ /s/oxms) {
762	0		0			0	while (@char and (exists $tr{$char[0]}) and ($tr{$char[0]} eq $tr{$char})) {
			0
763	0					0	shift @char;
764	0					0	$tr++;
765							}
766							}
767							}
768							else {
769	0					0	$replaced .= $char;
770							}
771							}
772							}
773
774	0	0				0	if ($modifier =~ /r/oxms) {
775	0					0	return $replaced;
776							}
777							else {
778	0					0	$_[0] = $replaced;
779	0	0				0	if ($bind_operator =~ / !~ /oxms) {
780	0					0	return not $tr;
781							}
782							else {
783	0					0	return $tr;
784							}
785							}
786							}
787
788							#
789							# Latin-9 chop
790							#
791							sub Char::Elatin9::chop(@) {
792
793	0			0	0	0	my $chop;
794	0	0				0	if (@_ == 0) {
795	0					0	my @char = /\G ($q_char) /oxmsg;
796	0					0	$chop = pop @char;
797	0					0	$_ = join '', @char;
798							}
799							else {
800	0					0	for (@_) {
801	0					0	my @char = /\G ($q_char) /oxmsg;
802	0					0	$chop = pop @char;
803	0					0	$_ = join '', @char;
804							}
805							}
806	0					0	return $chop;
807							}
808
809							#
810							# Latin-9 index by octet
811							#
812							sub Char::Elatin9::index($$;$) {
813
814	0			0	1	0	my($str,$substr,$position) = @_;
815	0		0			0	$position \|\|= 0;
816	0					0	my $pos = 0;
817
818	0					0	while ($pos < CORE::length($str)) {
819	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
820	0	0				0	if ($pos >= $position) {
821	0					0	return $pos;
822							}
823							}
824	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
825	0					0	$pos += CORE::length($1);
826							}
827							else {
828	0					0	$pos += 1;
829							}
830							}
831	0					0	return -1;
832							}
833
834							#
835							# Latin-9 reverse index
836							#
837							sub Char::Elatin9::rindex($$;$) {
838
839	0			0	0	0	my($str,$substr,$position) = @_;
840	0		0			0	$position \|\|= CORE::length($str) - 1;
841	0					0	my $pos = 0;
842	0					0	my $rindex = -1;
843
844	0		0			0	while (($pos < CORE::length($str)) and ($pos <= $position)) {
845	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
846	0					0	$rindex = $pos;
847							}
848	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
849	0					0	$pos += CORE::length($1);
850							}
851							else {
852	0					0	$pos += 1;
853							}
854							}
855	0					0	return $rindex;
856							}
857
858							#
859							# Latin-9 lower case first with parameter
860							#
861							sub Char::Elatin9::lcfirst(@) {
862	0	0		0	0	0	if (@_) {
863	0					0	my $s = shift @_;
864	0	0	0			0	if (@_ and wantarray) {
865	0					0	return Char::Elatin9::lc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
866							}
867							else {
868	0					0	return Char::Elatin9::lc(CORE::substr($s,0,1)) . CORE::substr($s,1);
869							}
870							}
871							else {
872	0					0	return Char::Elatin9::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
873							}
874							}
875
876							#
877							# Latin-9 lower case first without parameter
878							#
879							sub Char::Elatin9::lcfirst_() {
880	0			0	0	0	return Char::Elatin9::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
881							}
882
883							#
884							# Latin-9 lower case with parameter
885							#
886							sub Char::Elatin9::lc(@) {
887	0	0		0	0	0	if (@_) {
888	0					0	my $s = shift @_;
889	0	0	0			0	if (@_ and wantarray) {
890	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
891							}
892							else {
893	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
894							}
895							}
896							else {
897	0					0	return Char::Elatin9::lc_();
898							}
899							}
900
901							#
902							# Latin-9 lower case without parameter
903							#
904							sub Char::Elatin9::lc_() {
905	0			0	0	0	my $s = $_;
906	0	0				0	return join '', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
907							}
908
909							#
910							# Latin-9 upper case first with parameter
911							#
912							sub Char::Elatin9::ucfirst(@) {
913	0	0		0	0	0	if (@_) {
914	0					0	my $s = shift @_;
915	0	0	0			0	if (@_ and wantarray) {
916	0					0	return Char::Elatin9::uc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
917							}
918							else {
919	0					0	return Char::Elatin9::uc(CORE::substr($s,0,1)) . CORE::substr($s,1);
920							}
921							}
922							else {
923	0					0	return Char::Elatin9::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
924							}
925							}
926
927							#
928							# Latin-9 upper case first without parameter
929							#
930							sub Char::Elatin9::ucfirst_() {
931	0			0	0	0	return Char::Elatin9::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
932							}
933
934							#
935							# Latin-9 upper case with parameter
936							#
937							sub Char::Elatin9::uc(@) {
938	0	0		0	0	0	if (@_) {
939	0					0	my $s = shift @_;
940	0	0	0			0	if (@_ and wantarray) {
941	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
942							}
943							else {
944	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
945							}
946							}
947							else {
948	0					0	return Char::Elatin9::uc_();
949							}
950							}
951
952							#
953							# Latin-9 upper case without parameter
954							#
955							sub Char::Elatin9::uc_() {
956	0			0	0	0	my $s = $_;
957	0	0				0	return join '', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
958							}
959
960							#
961							# Latin-9 fold case with parameter
962							#
963							sub Char::Elatin9::fc(@) {
964	0	0		0	0	0	if (@_) {
965	0					0	my $s = shift @_;
966	0	0	0			0	if (@_ and wantarray) {
967	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
968							}
969							else {
970	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
971							}
972							}
973							else {
974	0					0	return Char::Elatin9::fc_();
975							}
976							}
977
978							#
979							# Latin-9 fold case without parameter
980							#
981							sub Char::Elatin9::fc_() {
982	0			0	0	0	my $s = $_;
983	0	0				0	return join '', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
984							}
985
986							#
987							# Latin-9 regexp capture
988							#
989							{
990							sub Char::Elatin9::capture {
991	0			0	1	0	return $_[0];
992							}
993							}
994
995							#
996							# Latin-9 regexp ignore case modifier
997							#
998							sub Char::Elatin9::ignorecase {
999
1000	0			0	0	0	my @string = @_;
1001	0					0	my $metachar = qr/[\@\\\|[\]{]/oxms;
1002
1003							# ignore case of $scalar or @array
1004	0					0	for my $string (@string) {
1005
1006							# split regexp
1007	0					0	my @char = $string =~ /\G(
1008							\[\^ \|
1009							\\? (?:$q_char)
1010							)/oxmsg;
1011
1012							# unescape character
1013	0					0	for (my $i=0; $i <= $#char; $i++) {
1014	0	0				0	next if not defined $char[$i];
1015
1016							# open character class [...]
1017	0	0				0	if ($char[$i] eq '[') {
		0
		0
		0
1018	0					0	my $left = $i;
1019
1020							# [] make die "unmatched [] in regexp ..."
1021
1022	0	0				0	if ($char[$i+1] eq ']') {
1023	0					0	$i++;
1024							}
1025
1026	0					0	while (1) {
1027	0	0				0	if (++$i > $#char) {
1028	0					0	croak "Unmatched [] in regexp";
1029							}
1030	0	0				0	if ($char[$i] eq ']') {
1031	0					0	my $right = $i;
1032	0					0	my @charlist = charlist_qr(@char[$left+1..$right-1], 'i');
1033
1034							# escape character
1035	0					0	for my $char (@charlist) {
1036	0	0				0	if (0) {
1037							}
1038
1039	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1040	0					0	$char = $1 . '\\' . $char;
1041							}
1042							}
1043
1044							# [...]
1045	0					0	splice @char, $left, $right-$left+1, '(?:' . join('\|', @charlist) . ')';
1046
1047	0					0	$i = $left;
1048	0					0	last;
1049							}
1050							}
1051							}
1052
1053							# open character class [^...]
1054							elsif ($char[$i] eq '[^') {
1055	0					0	my $left = $i;
1056
1057							# [^] make die "unmatched [] in regexp ..."
1058
1059	0	0				0	if ($char[$i+1] eq ']') {
1060	0					0	$i++;
1061							}
1062
1063	0					0	while (1) {
1064	0	0				0	if (++$i > $#char) {
1065	0					0	croak "Unmatched [] in regexp";
1066							}
1067	0	0				0	if ($char[$i] eq ']') {
1068	0					0	my $right = $i;
1069	0					0	my @charlist = charlist_not_qr(@char[$left+1..$right-1], 'i');
1070
1071							# escape character
1072	0					0	for my $char (@charlist) {
1073	0	0				0	if (0) {
1074							}
1075
1076	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1077	0					0	$char = '\\' . $char;
1078							}
1079							}
1080
1081							# [^...]
1082	0					0	splice @char, $left, $right-$left+1, '(?!' . join('\|', @charlist) . ")(?:$your_char)";
1083
1084	0					0	$i = $left;
1085	0					0	last;
1086							}
1087							}
1088							}
1089
1090							# rewrite classic character class or escape character
1091							elsif (my $char = classic_character_class($char[$i])) {
1092	0					0	$char[$i] = $char;
1093							}
1094
1095							# with /i modifier
1096							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1097	0					0	my $uc = Char::Elatin9::uc($char[$i]);
1098	0					0	my $fc = Char::Elatin9::fc($char[$i]);
1099	0	0				0	if ($uc ne $fc) {
1100	0	0				0	if (CORE::length($fc) == 1) {
1101	0					0	$char[$i] = '[' . $uc . $fc . ']';
1102							}
1103							else {
1104	0					0	$char[$i] = '(?:' . $uc . '\|' . $fc . ')';
1105							}
1106							}
1107							}
1108							}
1109
1110							# characterize
1111	0					0	for (my $i=0; $i <= $#char; $i++) {
1112	0	0				0	next if not defined $char[$i];
1113
1114	0	0				0	if (0) {
1115							}
1116
1117							# quote character before ? + * {
1118	0	0				0	elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
1119	0	0				0	if ($char[$i-1] !~ /\A [\x00-\xFF] \z/oxms) {
1120	0					0	$char[$i-1] = '(?:' . $char[$i-1] . ')';
1121							}
1122							}
1123							}
1124
1125	0					0	$string = join '', @char;
1126							}
1127
1128							# make regexp string
1129	0					0	return @string;
1130							}
1131
1132							#
1133							# classic character class ( \D \S \W \d \s \w \C \X \H \V \h \v \R \N \b \B )
1134							#
1135							sub Char::Elatin9::classic_character_class {
1136	0			0	0	0	my($char) = @_;
1137
1138							return {
1139	0		0			0	'\D' => '${Char::Elatin9::eD}',
1140							'\S' => '${Char::Elatin9::eS}',
1141							'\W' => '${Char::Elatin9::eW}',
1142							'\d' => '[0-9]',
1143
1144							# Before Perl 5.6, \s only matched the five whitespace characters
1145							# tab, newline, form-feed, carriage return, and the space character
1146							# itself, which, taken together, is the character class [\t\n\f\r ].
1147
1148							# Vertical tabs are now whitespace
1149							# \s in a regex now matches a vertical tab in all circumstances.
1150							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1151							# \t \n \v \f \r space
1152							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1153							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1154							'\s' => '\s',
1155
1156							'\w' => '[0-9A-Z_a-z]',
1157							'\C' => '[\x00-\xFF]',
1158							'\X' => 'X',
1159
1160							# \h \v \H \V
1161
1162							# P.114 Character Class Shortcuts
1163							# in Chapter 7: In the World of Regular Expressions
1164							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
1165
1166							# P.357 13.2.3 Whitespace
1167							# in Chapter 13: perlrecharclass: Perl Regular Expression Character Classes
1168							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1169							#
1170							# 0x00009 CHARACTER TABULATION h s
1171							# 0x0000a LINE FEED (LF) vs
1172							# 0x0000b LINE TABULATION v
1173							# 0x0000c FORM FEED (FF) vs
1174							# 0x0000d CARRIAGE RETURN (CR) vs
1175							# 0x00020 SPACE h s
1176
1177							# P.196 Table 5-9. Alphanumeric regex metasymbols
1178							# in Chapter 5. Pattern Matching
1179							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1180
1181							# (and so on)
1182
1183							'\H' => '${Char::Elatin9::eH}',
1184							'\V' => '${Char::Elatin9::eV}',
1185							'\h' => '[\x09\x20]',
1186							'\v' => '[\x0A\x0B\x0C\x0D]',
1187							'\R' => '${Char::Elatin9::eR}',
1188
1189							# \N
1190							#
1191							# http://perldoc.perl.org/perlre.html
1192							# Character Classes and other Special Escapes
1193							# Any character but \n (experimental). Not affected by /s modifier
1194
1195							'\N' => '${Char::Elatin9::eN}',
1196
1197							# \b \B
1198
1199							# P.180 Boundaries: The \b and \B Assertions
1200							# in Chapter 5: Pattern Matching
1201							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1202
1203							# P.219 Boundaries: The \b and \B Assertions
1204							# in Chapter 5: Pattern Matching
1205							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1206
1207							# '\b' => '(?:(?<=\A\|\W)(?=\w)\|(?<=\w)(?=\W\|\z))',
1208							'\b' => '${Char::Elatin9::eb}',
1209
1210							# '\B' => '(?:(?<=\w)(?=\w)\|(?<=\W)(?=\W))',
1211							'\B' => '${Char::Elatin9::eB}',
1212
1213							}->{$char} \|\| '';
1214							}
1215
1216							#
1217							# prepare Latin-9 characters per length
1218							#
1219
1220							# 1 octet characters
1221							my @chars1 = ();
1222							sub chars1 {
1223	0	0		0	0	0	if (@chars1) {
1224	0					0	return @chars1;
1225							}
1226	0	0				0	if (exists $range_tr{1}) {
1227	0					0	my @ranges = @{ $range_tr{1} };
	0					0
1228	0					0	while (my @range = splice(@ranges,0,1)) {
1229	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1230	0					0	push @chars1, pack 'C', $oct0;
1231							}
1232							}
1233							}
1234	0					0	return @chars1;
1235							}
1236
1237							# 2 octets characters
1238							my @chars2 = ();
1239							sub chars2 {
1240	0	0		0	0	0	if (@chars2) {
1241	0					0	return @chars2;
1242							}
1243	0	0				0	if (exists $range_tr{2}) {
1244	0					0	my @ranges = @{ $range_tr{2} };
	0					0
1245	0					0	while (my @range = splice(@ranges,0,2)) {
1246	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1247	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1248	0					0	push @chars2, pack 'CC', $oct0,$oct1;
1249							}
1250							}
1251							}
1252							}
1253	0					0	return @chars2;
1254							}
1255
1256							# 3 octets characters
1257							my @chars3 = ();
1258							sub chars3 {
1259	0	0		0	0	0	if (@chars3) {
1260	0					0	return @chars3;
1261							}
1262	0	0				0	if (exists $range_tr{3}) {
1263	0					0	my @ranges = @{ $range_tr{3} };
	0					0
1264	0					0	while (my @range = splice(@ranges,0,3)) {
1265	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1266	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1267	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1268	0					0	push @chars3, pack 'CCC', $oct0,$oct1,$oct2;
1269							}
1270							}
1271							}
1272							}
1273							}
1274	0					0	return @chars3;
1275							}
1276
1277							# 4 octets characters
1278							my @chars4 = ();
1279							sub chars4 {
1280	0	0		0	0	0	if (@chars4) {
1281	0					0	return @chars4;
1282							}
1283	0	0				0	if (exists $range_tr{4}) {
1284	0					0	my @ranges = @{ $range_tr{4} };
	0					0
1285	0					0	while (my @range = splice(@ranges,0,4)) {
1286	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1287	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1288	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1289	0					0	for my $oct3 (@{$range[3]}) {
	0					0
1290	0					0	push @chars4, pack 'CCCC', $oct0,$oct1,$oct2,$oct3;
1291							}
1292							}
1293							}
1294							}
1295							}
1296							}
1297	0					0	return @chars4;
1298							}
1299
1300							#
1301							# Latin-9 open character list for tr
1302							#
1303							sub _charlist_tr {
1304
1305	0			0		0	local $_ = shift @_;
1306
1307							# unescape character
1308	0					0	my @char = ();
1309	0					0	while (not /\G \z/oxmsgc) {
1310	0	0				0	if (/\G (\\0?55\|\\x2[Dd]\|\\-) /oxmsgc) {
		0
		0
		0
		0
		0
		0
1311	0					0	push @char, '\-';
1312							}
1313							elsif (/\G \\ ([0-7]{2,3}) /oxmsgc) {
1314	0					0	push @char, CORE::chr(oct $1);
1315							}
1316							elsif (/\G \\x ([0-9A-Fa-f]{1,2}) /oxmsgc) {
1317	0					0	push @char, CORE::chr(hex $1);
1318							}
1319							elsif (/\G \\c ([\x40-\x5F]) /oxmsgc) {
1320	0					0	push @char, CORE::chr(CORE::ord($1) & 0x1F);
1321							}
1322							elsif (/\G (\\ [0nrtfbae]) /oxmsgc) {
1323	0					0	push @char, {
1324							'\0' => "\0",
1325							'\n' => "\n",
1326							'\r' => "\r",
1327							'\t' => "\t",
1328							'\f' => "\f",
1329							'\b' => "\x08", # \b means backspace in character class
1330							'\a' => "\a",
1331							'\e' => "\e",
1332							}->{$1};
1333							}
1334							elsif (/\G \\ ($q_char) /oxmsgc) {
1335	0					0	push @char, $1;
1336							}
1337							elsif (/\G ($q_char) /oxmsgc) {
1338	0					0	push @char, $1;
1339							}
1340							}
1341
1342							# join separated multiple-octet
1343	0					0	@char = join('',@char) =~ /\G (\\-\|$q_char) /oxmsg;
1344
1345							# unescape '-'
1346	0					0	my @i = ();
1347	0					0	for my $i (0 .. $#char) {
1348	0	0				0	if ($char[$i] eq '\-') {
		0
1349	0					0	$char[$i] = '-';
1350							}
1351							elsif ($char[$i] eq '-') {
1352	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1353	0					0	push @i, $i;
1354							}
1355							}
1356							}
1357
1358							# open character list (reverse for splice)
1359	0					0	for my $i (CORE::reverse @i) {
1360	0					0	my @range = ();
1361
1362							# range error
1363	0	0	0			0	if ((CORE::length($char[$i-1]) > CORE::length($char[$i+1])) or ($char[$i-1] gt $char[$i+1])) {
1364	0					0	croak "Invalid tr/// range \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1365							}
1366
1367							# range of multiple-octet code
1368	0	0				0	if (CORE::length($char[$i-1]) == 1) {
		0
		0
		0
1369	0	0				0	if (CORE::length($char[$i+1]) == 1) {
		0
		0
		0
1370	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars1();
	0					0
1371							}
1372							elsif (CORE::length($char[$i+1]) == 2) {
1373	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1374	0					0	push @range, grep {$_ le $char[$i+1]} chars2();
	0					0
1375							}
1376							elsif (CORE::length($char[$i+1]) == 3) {
1377	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1378	0					0	push @range, chars2();
1379	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1380							}
1381							elsif (CORE::length($char[$i+1]) == 4) {
1382	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1383	0					0	push @range, chars2();
1384	0					0	push @range, chars3();
1385	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1386							}
1387							else {
1388	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1389							}
1390							}
1391							elsif (CORE::length($char[$i-1]) == 2) {
1392	0	0				0	if (CORE::length($char[$i+1]) == 2) {
		0
		0
1393	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars2();
	0					0
1394							}
1395							elsif (CORE::length($char[$i+1]) == 3) {
1396	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1397	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1398							}
1399							elsif (CORE::length($char[$i+1]) == 4) {
1400	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1401	0					0	push @range, chars3();
1402	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1403							}
1404							else {
1405	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1406							}
1407							}
1408							elsif (CORE::length($char[$i-1]) == 3) {
1409	0	0				0	if (CORE::length($char[$i+1]) == 3) {
		0
1410	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars3();
	0					0
1411							}
1412							elsif (CORE::length($char[$i+1]) == 4) {
1413	0					0	push @range, grep {$char[$i-1] le $_} chars3();
	0					0
1414	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1415							}
1416							else {
1417	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1418							}
1419							}
1420							elsif (CORE::length($char[$i-1]) == 4) {
1421	0	0				0	if (CORE::length($char[$i+1]) == 4) {
1422	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars4();
	0					0
1423							}
1424							else {
1425	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1426							}
1427							}
1428							else {
1429	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1430							}
1431
1432	0					0	splice @char, $i-1, 3, @range;
1433							}
1434
1435	0					0	return @char;
1436							}
1437
1438							#
1439							# Latin-9 open character class
1440							#
1441							sub _cc {
1442	0	0		0		0	if (scalar(@_) == 0) {
		0
		0
1443	0					0	die __FILE__, ": subroutine cc got no parameter.";
1444							}
1445							elsif (scalar(@_) == 1) {
1446	0					0	return sprintf('\x%02X',$_[0]);
1447							}
1448							elsif (scalar(@_) == 2) {
1449	0	0				0	if ($_[0] > $_[1]) {
		0
		0
1450	0					0	die __FILE__, ": subroutine cc got \$_[0] > \$_[1] parameters).";
1451							}
1452							elsif ($_[0] == $_[1]) {
1453	0					0	return sprintf('\x%02X',$_[0]);
1454							}
1455							elsif (($_[0]+1) == $_[1]) {
1456	0					0	return sprintf('[\\x%02X\\x%02X]',$_[0],$_[1]);
1457							}
1458							else {
1459	0					0	return sprintf('[\\x%02X-\\x%02X]',$_[0],$_[1]);
1460							}
1461							}
1462							else {
1463	0					0	die __FILE__, ": subroutine cc got 3 or more parameters (@{[scalar(@_)]} parameters).";
	0					0
1464							}
1465							}
1466
1467							#
1468							# Latin-9 octet range
1469							#
1470							sub _octets {
1471	0			0		0	my $length = shift @_;
1472
1473	0	0				0	if ($length == 1) {
1474	0					0	my($a1) = unpack 'C', $_[0];
1475	0					0	my($z1) = unpack 'C', $_[1];
1476
1477	0	0				0	if ($a1 > $z1) {
1478	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$a1) . '-\x' . unpack('H',$z1);
1479							}
1480
1481	0	0				0	if ($a1 == $z1) {
		0
1482	0					0	return sprintf('\x%02X',$a1);
1483							}
1484							elsif (($a1+1) == $z1) {
1485	0					0	return sprintf('\x%02X\x%02X',$a1,$z1);
1486							}
1487							else {
1488	0					0	return sprintf('\x%02X-\x%02X',$a1,$z1);
1489							}
1490							}
1491							else {
1492	0					0	die __FILE__, ": subroutine _octets got invalid length ($length).";
1493							}
1494							}
1495
1496							#
1497							# Latin-9 range regexp
1498							#
1499							sub _range_regexp {
1500	0			0		0	my($length,$first,$last) = @_;
1501
1502	0					0	my @range_regexp = ();
1503	0	0				0	if (not exists $range_tr{$length}) {
1504	0					0	return @range_regexp;
1505							}
1506
1507	0					0	my @ranges = @{ $range_tr{$length} };
	0					0
1508	0					0	while (my @range = splice(@ranges,0,$length)) {
1509	0					0	my $min = '';
1510	0					0	my $max = '';
1511	0					0	for (my $i=0; $i < $length; $i++) {
1512	0					0	$min .= pack 'C', $range[$i][0];
1513	0					0	$max .= pack 'C', $range[$i][-1];
1514							}
1515
1516							# min___max
1517							# FIRST_____________LAST
1518							# (nothing)
1519
1520	0	0	0			0	if ($max lt $first) {
		0	0
		0	0
		0	0
		0	0
		0	0
		0	0
1521							}
1522
1523							# **********
1524							# min_________max
1525							# FIRST_____________LAST
1526							# **********
1527
1528							elsif (($min le $first) and ($first le $max) and ($max le $last)) {
1529	0					0	push @range_regexp, _octets($length,$first,$max,$min,$max);
1530							}
1531
1532							# **********************
1533							# min________________max
1534							# FIRST_____________LAST
1535							# **********************
1536
1537							elsif (($min eq $first) and ($max eq $last)) {
1538	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1539							}
1540
1541							# *********
1542							# min___max
1543							# FIRST_____________LAST
1544							# *********
1545
1546							elsif (($first le $min) and ($max le $last)) {
1547	0					0	push @range_regexp, _octets($length,$min,$max,$min,$max);
1548							}
1549
1550							# **********************
1551							# min__________________________max
1552							# FIRST_____________LAST
1553							# **********************
1554
1555							elsif (($min le $first) and ($last le $max)) {
1556	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1557							}
1558
1559							# *********
1560							# min________max
1561							# FIRST_____________LAST
1562							# *********
1563
1564							elsif (($first le $min) and ($min le $last) and ($last le $max)) {
1565	0					0	push @range_regexp, _octets($length,$min,$last,$min,$max);
1566							}
1567
1568							# min___max
1569							# FIRST_____________LAST
1570							# (nothing)
1571
1572							elsif ($last lt $min) {
1573							}
1574
1575							else {
1576	0					0	die __FILE__, ": subroutine _range_regexp panic.";
1577							}
1578							}
1579
1580	0					0	return @range_regexp;
1581							}
1582
1583							#
1584							# Latin-9 open character list for qr and not qr
1585							#
1586							sub _charlist {
1587
1588	0			0		0	my $modifier = pop @_;
1589	0					0	my @char = @_;
1590
1591	0	0				0	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
1592
1593							# unescape character
1594	0					0	for (my $i=0; $i <= $#char; $i++) {
1595
1596							# escape - to ...
1597	0	0	0			0	if ($char[$i] eq '-') {
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
1598	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1599	0					0	$char[$i] = '...';
1600							}
1601							}
1602
1603							# octal escape sequence
1604							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
1605	0					0	$char[$i] = octchr($1);
1606							}
1607
1608							# hexadecimal escape sequence
1609							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
1610	0					0	$char[$i] = hexchr($1);
1611							}
1612
1613							# \N{CHARNAME} --> N\{CHARNAME}
1614							elsif ($char[$i] =~ /\A \\ (N) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1615	0					0	$char[$i] = $1 . '\\' . $2;
1616							}
1617
1618							# \p{PROPERTY} --> p\{PROPERTY}
1619							elsif ($char[$i] =~ /\A \\ (p) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1620	0					0	$char[$i] = $1 . '\\' . $2;
1621							}
1622
1623							# \P{PROPERTY} --> P\{PROPERTY}
1624							elsif ($char[$i] =~ /\A \\ (P) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1625	0					0	$char[$i] = $1 . '\\' . $2;
1626							}
1627
1628							# \p, \P, \X --> p, P, X
1629							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
1630	0					0	$char[$i] = $1;
1631							}
1632
1633							elsif ($char[$i] =~ /\A \\ ([0-7]{2,3}) \z/oxms) {
1634	0					0	$char[$i] = CORE::chr oct $1;
1635							}
1636							elsif ($char[$i] =~ /\A \\x ([0-9A-Fa-f]{1,2}) \z/oxms) {
1637	0					0	$char[$i] = CORE::chr hex $1;
1638							}
1639							elsif ($char[$i] =~ /\A \\c ([\x40-\x5F]) \z/oxms) {
1640	0					0	$char[$i] = CORE::chr(CORE::ord($1) & 0x1F);
1641							}
1642							elsif ($char[$i] =~ /\A (\\ [0nrtfbaedswDSWHVhvR]) \z/oxms) {
1643	0					0	$char[$i] = {
1644							'\0' => "\0",
1645							'\n' => "\n",
1646							'\r' => "\r",
1647							'\t' => "\t",
1648							'\f' => "\f",
1649							'\b' => "\x08", # \b means backspace in character class
1650							'\a' => "\a",
1651							'\e' => "\e",
1652							'\d' => '[0-9]',
1653
1654							# Vertical tabs are now whitespace
1655							# \s in a regex now matches a vertical tab in all circumstances.
1656							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1657							# \t \n \v \f \r space
1658							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1659							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1660							'\s' => '\s',
1661
1662							'\w' => '[0-9A-Z_a-z]',
1663							'\D' => '${Char::Elatin9::eD}',
1664							'\S' => '${Char::Elatin9::eS}',
1665							'\W' => '${Char::Elatin9::eW}',
1666
1667							'\H' => '${Char::Elatin9::eH}',
1668							'\V' => '${Char::Elatin9::eV}',
1669							'\h' => '[\x09\x20]',
1670							'\v' => '[\x0A\x0B\x0C\x0D]',
1671							'\R' => '${Char::Elatin9::eR}',
1672
1673							}->{$1};
1674							}
1675
1676							# POSIX-style character classes
1677							elsif ($ignorecase and ($char[$i] =~ /\A ( \[\: \^? (?:lower\|upper) :\] ) \z/oxms)) {
1678	0					0	$char[$i] = {
1679
1680							'[:lower:]' => '[\x41-\x5A\x61-\x7A]',
1681							'[:upper:]' => '[\x41-\x5A\x61-\x7A]',
1682							'[:^lower:]' => '${Char::Elatin9::not_lower_i}',
1683							'[:^upper:]' => '${Char::Elatin9::not_upper_i}',
1684
1685							}->{$1};
1686							}
1687							elsif ($char[$i] =~ /\A ( \[\: \^? (?:alnum\|alpha\|ascii\|blank\|cntrl\|digit\|graph\|lower\|print\|punct\|space\|upper\|word\|xdigit) :\] ) \z/oxms) {
1688	0					0	$char[$i] = {
1689
1690							'[:alnum:]' => '[\x30-\x39\x41-\x5A\x61-\x7A]',
1691							'[:alpha:]' => '[\x41-\x5A\x61-\x7A]',
1692							'[:ascii:]' => '[\x00-\x7F]',
1693							'[:blank:]' => '[\x09\x20]',
1694							'[:cntrl:]' => '[\x00-\x1F\x7F]',
1695							'[:digit:]' => '[\x30-\x39]',
1696							'[:graph:]' => '[\x21-\x7F]',
1697							'[:lower:]' => '[\x61-\x7A]',
1698							'[:print:]' => '[\x20-\x7F]',
1699							'[:punct:]' => '[\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]',
1700
1701							# P.174 POSIX-Style Character Classes
1702							# in Chapter 5: Pattern Matching
1703							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1704
1705							# P.311 11.2.4 Character Classes and other Special Escapes
1706							# in Chapter 11: perlre: Perl regular expressions
1707							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1708
1709							# P.210 POSIX-Style Character Classes
1710							# in Chapter 5: Pattern Matching
1711							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1712
1713							'[:space:]' => '[\s\x0B]', # "\s" plus vertical tab ("\cK")
1714
1715							'[:upper:]' => '[\x41-\x5A]',
1716							'[:word:]' => '[\x30-\x39\x41-\x5A\x5F\x61-\x7A]',
1717							'[:xdigit:]' => '[\x30-\x39\x41-\x46\x61-\x66]',
1718							'[:^alnum:]' => '${Char::Elatin9::not_alnum}',
1719							'[:^alpha:]' => '${Char::Elatin9::not_alpha}',
1720							'[:^ascii:]' => '${Char::Elatin9::not_ascii}',
1721							'[:^blank:]' => '${Char::Elatin9::not_blank}',
1722							'[:^cntrl:]' => '${Char::Elatin9::not_cntrl}',
1723							'[:^digit:]' => '${Char::Elatin9::not_digit}',
1724							'[:^graph:]' => '${Char::Elatin9::not_graph}',
1725							'[:^lower:]' => '${Char::Elatin9::not_lower}',
1726							'[:^print:]' => '${Char::Elatin9::not_print}',
1727							'[:^punct:]' => '${Char::Elatin9::not_punct}',
1728							'[:^space:]' => '${Char::Elatin9::not_space}',
1729							'[:^upper:]' => '${Char::Elatin9::not_upper}',
1730							'[:^word:]' => '${Char::Elatin9::not_word}',
1731							'[:^xdigit:]' => '${Char::Elatin9::not_xdigit}',
1732
1733							}->{$1};
1734							}
1735							elsif ($char[$i] =~ /\A \\ ($q_char) \z/oxms) {
1736	0					0	$char[$i] = $1;
1737							}
1738							}
1739
1740							# open character list
1741	0					0	my @singleoctet = ();
1742	0					0	my @multipleoctet = ();
1743	0					0	for (my $i=0; $i <= $#char; ) {
1744
1745							# escaped -
1746	0	0	0			0	if (defined($char[$i+1]) and ($char[$i+1] eq '...')) {
		0
		0
		0
		0
		0
1747	0					0	$i += 1;
1748	0					0	next;
1749							}
1750
1751							# make range regexp
1752							elsif ($char[$i] eq '...') {
1753
1754							# range error
1755	0	0				0	if (CORE::length($char[$i-1]) > CORE::length($char[$i+1])) {
		0
1756	0					0	croak 'Invalid [] range in regexp (length(A) > length(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1757							}
1758							elsif (CORE::length($char[$i-1]) == CORE::length($char[$i+1])) {
1759	0	0				0	if ($char[$i-1] gt $char[$i+1]) {
1760	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1761							}
1762							}
1763
1764							# make range regexp per length
1765	0					0	for my $length (CORE::length($char[$i-1]) .. CORE::length($char[$i+1])) {
1766	0					0	my @regexp = ();
1767
1768							# is first and last
1769	0	0	0			0	if (($length == CORE::length($char[$i-1])) and ($length == CORE::length($char[$i+1]))) {
		0	0
		0
		0
1770	0					0	push @regexp, _range_regexp($length, $char[$i-1], $char[$i+1]);
1771							}
1772
1773							# is first
1774							elsif ($length == CORE::length($char[$i-1])) {
1775	0					0	push @regexp, _range_regexp($length, $char[$i-1], "\xFF" x $length);
1776							}
1777
1778							# is inside in first and last
1779							elsif ((CORE::length($char[$i-1]) < $length) and ($length < CORE::length($char[$i+1]))) {
1780	0					0	push @regexp, _range_regexp($length, "\x00" x $length, "\xFF" x $length);
1781							}
1782
1783							# is last
1784							elsif ($length == CORE::length($char[$i+1])) {
1785	0					0	push @regexp, _range_regexp($length, "\x00" x $length, $char[$i+1]);
1786							}
1787
1788							else {
1789	0					0	die __FILE__, ": subroutine make_regexp panic.";
1790							}
1791
1792	0	0				0	if ($length == 1) {
1793	0					0	push @singleoctet, @regexp;
1794							}
1795							else {
1796	0					0	push @multipleoctet, @regexp;
1797							}
1798							}
1799
1800	0					0	$i += 2;
1801							}
1802
1803							# with /i modifier
1804							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1805	0	0				0	if ($modifier =~ /i/oxms) {
1806	0					0	my $uc = Char::Elatin9::uc($char[$i]);
1807	0					0	my $fc = Char::Elatin9::fc($char[$i]);
1808	0	0				0	if ($uc ne $fc) {
1809	0	0				0	if (CORE::length($fc) == 1) {
1810	0					0	push @singleoctet, $uc, $fc;
1811							}
1812							else {
1813	0					0	push @singleoctet, $uc;
1814	0					0	push @multipleoctet, $fc;
1815							}
1816							}
1817							else {
1818	0					0	push @singleoctet, $char[$i];
1819							}
1820							}
1821							else {
1822	0					0	push @singleoctet, $char[$i];
1823							}
1824	0					0	$i += 1;
1825							}
1826
1827							# single character of single octet code
1828							elsif ($char[$i] =~ /\A (?: \\h ) \z/oxms) {
1829	0					0	push @singleoctet, "\t", "\x20";
1830	0					0	$i += 1;
1831							}
1832							elsif ($char[$i] =~ /\A (?: \\v ) \z/oxms) {
1833	0					0	push @singleoctet, "\x0A", "\x0B", "\x0C", "\x0D";
1834	0					0	$i += 1;
1835							}
1836							elsif ($char[$i] =~ /\A (?: \\d \| \\s \| \\w ) \z/oxms) {
1837	0					0	push @singleoctet, $char[$i];
1838	0					0	$i += 1;
1839							}
1840
1841							# single character of multiple-octet code
1842							else {
1843	0					0	push @multipleoctet, $char[$i];
1844	0					0	$i += 1;
1845							}
1846							}
1847
1848							# quote metachar
1849	0					0	for (@singleoctet) {
1850	0	0				0	if ($_ eq '...') {
		0
		0
		0
		0
1851	0					0	$_ = '-';
1852							}
1853							elsif (/\A \n \z/oxms) {
1854	0					0	$_ = '\n';
1855							}
1856							elsif (/\A \r \z/oxms) {
1857	0					0	$_ = '\r';
1858							}
1859							elsif (/\A ([\x00-\x20\x7F-\xFF]) \z/oxms) {
1860	0					0	$_ = sprintf('\x%02X', CORE::ord $1);
1861							}
1862							elsif (/\A [\x00-\xFF] \z/oxms) {
1863	0					0	$_ = quotemeta $_;
1864							}
1865							}
1866
1867							# return character list
1868	0					0	return \@singleoctet, \@multipleoctet;
1869							}
1870
1871							#
1872							# Latin-9 octal escape sequence
1873							#
1874							sub octchr {
1875	0			0	0	0	my($octdigit) = @_;
1876
1877	0					0	my @binary = ();
1878	0					0	for my $octal (split(//,$octdigit)) {
1879	0					0	push @binary, {
1880							'0' => '000',
1881							'1' => '001',
1882							'2' => '010',
1883							'3' => '011',
1884							'4' => '100',
1885							'5' => '101',
1886							'6' => '110',
1887							'7' => '111',
1888							}->{$octal};
1889							}
1890	0					0	my $binary = join '', @binary;
1891
1892	0					0	my $octchr = {
1893							# 1234567
1894							1 => pack('B*', "0000000$binary"),
1895							2 => pack('B*', "000000$binary"),
1896							3 => pack('B*', "00000$binary"),
1897							4 => pack('B*', "0000$binary"),
1898							5 => pack('B*', "000$binary"),
1899							6 => pack('B*', "00$binary"),
1900							7 => pack('B*', "0$binary"),
1901							0 => pack('B*', "$binary"),
1902
1903							}->{CORE::length($binary) % 8};
1904
1905	0					0	return $octchr;
1906							}
1907
1908							#
1909							# Latin-9 hexadecimal escape sequence
1910							#
1911							sub hexchr {
1912	0			0	0	0	my($hexdigit) = @_;
1913
1914	0					0	my $hexchr = {
1915							1 => pack('H*', "0$hexdigit"),
1916							0 => pack('H*', "$hexdigit"),
1917
1918							}->{CORE::length($_[0]) % 2};
1919
1920	0					0	return $hexchr;
1921							}
1922
1923							#
1924							# Latin-9 open character list for qr
1925							#
1926							sub charlist_qr {
1927
1928	0			0	0	0	my $modifier = pop @_;
1929	0					0	my @char = @_;
1930
1931	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
1932	0					0	my @singleoctet = @$singleoctet;
1933	0					0	my @multipleoctet = @$multipleoctet;
1934
1935							# return character list
1936	0	0				0	if (scalar(@singleoctet) >= 1) {
1937
1938							# with /i modifier
1939	0	0				0	if ($modifier =~ m/i/oxms) {
1940	0					0	my %singleoctet_ignorecase = ();
1941	0					0	for (@singleoctet) {
1942	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
1943	0					0	for my $ord (hex($1) .. hex($2)) {
1944	0					0	my $char = CORE::chr($ord);
1945	0					0	my $uc = Char::Elatin9::uc($char);
1946	0					0	my $fc = Char::Elatin9::fc($char);
1947	0	0				0	if ($uc eq $fc) {
1948	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
1949							}
1950							else {
1951	0	0				0	if (CORE::length($fc) == 1) {
1952	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1953	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
1954							}
1955							else {
1956	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1957	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
1958							}
1959							}
1960							}
1961							}
1962	0	0				0	if ($_ ne '') {
1963	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
1964							}
1965							}
1966	0					0	my $i = 0;
1967	0					0	my @singleoctet_ignorecase = ();
1968	0					0	for my $ord (0 .. 255) {
1969	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
1970	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
1971							}
1972							else {
1973	0					0	$i++;
1974							}
1975							}
1976	0					0	@singleoctet = ();
1977	0					0	for my $range (@singleoctet_ignorecase) {
1978	0	0				0	if (ref $range) {
1979	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
1980	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
1981							}
1982							elsif (scalar(@{$range}) == 2) {
1983	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1984							}
1985							else {
1986	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1987							}
1988							}
1989							}
1990							}
1991
1992	0					0	my $not_anchor = '';
1993
1994	0					0	push @multipleoctet, join('', $not_anchor, '[', @singleoctet, ']' );
1995							}
1996	0	0				0	if (scalar(@multipleoctet) >= 2) {
1997	0					0	return '(?:' . join('\|', @multipleoctet) . ')';
1998							}
1999							else {
2000	0					0	return $multipleoctet[0];
2001							}
2002							}
2003
2004							#
2005							# Latin-9 open character list for not qr
2006							#
2007							sub charlist_not_qr {
2008
2009	0			0	0	0	my $modifier = pop @_;
2010	0					0	my @char = @_;
2011
2012	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
2013	0					0	my @singleoctet = @$singleoctet;
2014	0					0	my @multipleoctet = @$multipleoctet;
2015
2016							# with /i modifier
2017	0	0				0	if ($modifier =~ m/i/oxms) {
2018	0					0	my %singleoctet_ignorecase = ();
2019	0					0	for (@singleoctet) {
2020	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
2021	0					0	for my $ord (hex($1) .. hex($2)) {
2022	0					0	my $char = CORE::chr($ord);
2023	0					0	my $uc = Char::Elatin9::uc($char);
2024	0					0	my $fc = Char::Elatin9::fc($char);
2025	0	0				0	if ($uc eq $fc) {
2026	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
2027							}
2028							else {
2029	0	0				0	if (CORE::length($fc) == 1) {
2030	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2031	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
2032							}
2033							else {
2034	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2035	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
2036							}
2037							}
2038							}
2039							}
2040	0	0				0	if ($_ ne '') {
2041	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
2042							}
2043							}
2044	0					0	my $i = 0;
2045	0					0	my @singleoctet_ignorecase = ();
2046	0					0	for my $ord (0 .. 255) {
2047	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
2048	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
2049							}
2050							else {
2051	0					0	$i++;
2052							}
2053							}
2054	0					0	@singleoctet = ();
2055	0					0	for my $range (@singleoctet_ignorecase) {
2056	0	0				0	if (ref $range) {
2057	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
2058	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
2059							}
2060							elsif (scalar(@{$range}) == 2) {
2061	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2062							}
2063							else {
2064	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2065							}
2066							}
2067							}
2068							}
2069
2070							# return character list
2071	0	0				0	if (scalar(@multipleoctet) >= 1) {
2072	0	0				0	if (scalar(@singleoctet) >= 1) {
2073
2074							# any character other than multiple-octet and single octet character class
2075	0					0	return '(?!' . join('\|', @multipleoctet) . ')(?:[^' . join('', @singleoctet) . '])';
2076							}
2077							else {
2078
2079							# any character other than multiple-octet character class
2080	0					0	return '(?!' . join('\|', @multipleoctet) . ")(?:$your_char)";
2081							}
2082							}
2083							else {
2084	0	0				0	if (scalar(@singleoctet) >= 1) {
2085
2086							# any character other than single octet character class
2087	0					0	return '(?:[^' . join('', @singleoctet) . '])';
2088							}
2089							else {
2090
2091							# any character
2092	0					0	return "(?:$your_char)";
2093							}
2094							}
2095							}
2096
2097							#
2098							# open file in read mode
2099							#
2100							sub _open_r {
2101	177			177		620	my(undef,$file) = @_;
2102	177					872	$file =~ s#\A (\s) #./$1#oxms;
2103	177		33			18406	return eval(q{open($_[0],'<',$_[1])}) \|\|
2104							open($_[0],"< $file\0");
2105							}
2106
2107							#
2108							# open file in write mode
2109							#
2110							sub _open_w {
2111	0			0		0	my(undef,$file) = @_;
2112	0					0	$file =~ s#\A (\s) #./$1#oxms;
2113	0		0			0	return eval(q{open($_[0],'>',$_[1])}) \|\|
2114							open($_[0],"> $file\0");
2115							}
2116
2117							#
2118							# open file in append mode
2119							#
2120							sub _open_a {
2121	0			0		0	my(undef,$file) = @_;
2122	0					0	$file =~ s#\A (\s) #./$1#oxms;
2123	0		0			0	return eval(q{open($_[0],'>>',$_[1])}) \|\|
2124							open($_[0],">> $file\0");
2125							}
2126
2127							#
2128							# safe system
2129							#
2130							sub _systemx {
2131
2132							# P.707 29.2.33. exec
2133							# in Chapter 29: Functions
2134							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2135							#
2136							# Be aware that in older releases of Perl, exec (and system) did not flush
2137							# your output buffer, so you needed to enable command buffering by setting $\|
2138							# on one or more filehandles to avoid lost output in the case of exec, or
2139							# misordererd output in the case of system. This situation was largely remedied
2140							# in the 5.6 release of Perl. (So, 5.005 release not yet.)
2141
2142							# P.855 exec
2143							# in Chapter 27: Functions
2144							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2145							#
2146							# In very old release of Perl (before v5.6), exec (and system) did not flush
2147							# your output buffer, so you needed to enable command buffering by setting $\|
2148							# on one or more filehandles to avoid lost output with exec or misordered
2149							# output with system.
2150
2151	177			177		731	$\| = 1;
2152
2153							# P.565 23.1.2. Cleaning Up Your Environment
2154							# in Chapter 23: Security
2155							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2156
2157							# P.656 Cleaning Up Your Environment
2158							# in Chapter 20: Security
2159							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2160
2161							# local $ENV{'PATH'} = '.';
2162	177					2313	local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer
2163
2164							# P.707 29.2.33. exec
2165							# in Chapter 29: Functions
2166							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2167							#
2168							# As we mentioned earlier, exec treats a discrete list of arguments as an
2169							# indication that it should bypass shell processing. However, there is one
2170							# place where you might still get tripped up. The exec call (and system, too)
2171							# will not distinguish between a single scalar argument and an array containing
2172							# only one element.
2173							#
2174							# @args = ("echo surprise"); # just one element in list
2175							# exec @args # still subject to shell escapes
2176							# or die "exec: $!"; # because @args == 1
2177							#
2178							# To avoid this, you can use the PATHNAME syntax, explicitly duplicating the
2179							# first argument as the pathname, which forces the rest of the arguments to be
2180							# interpreted as a list, even if there is only one of them:
2181							#
2182							# exec { $args[0] } @args # safe even with one-argument list
2183							# or die "can't exec @args: $!";
2184
2185							# P.855 exec
2186							# in Chapter 27: Functions
2187							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2188							#
2189							# As we mentioned earlier, exec treats a discrete list of arguments as a
2190							# directive to bypass shell processing. However, there is one place where
2191							# you might still get tripped up. The exec call (and system, too) cannot
2192							# distinguish between a single scalar argument and an array containing
2193							# only one element.
2194							#
2195							# @args = ("echo surprise"); # just one element in list
2196							# exec @args # still subject to shell escapes
2197							# \|\| die "exec: $!"; # because @args == 1
2198							#
2199							# To avoid this, use the PATHNAME syntax, explicitly duplicating the first
2200							# argument as the pathname, which forces the rest of the arguments to be
2201							# interpreted as a list, even if there is only one of them:
2202							#
2203							# exec { $args[0] } @args # safe even with one-argument list
2204							# \|\| die "can't exec @args: $!";
2205
2206	177					576	return CORE::system { $_[0] } @_; # safe even with one-argument list
	177					13771490
2207							}
2208
2209							#
2210							# Latin-9 order to character (with parameter)
2211							#
2212							sub Char::Elatin9::chr(;$) {
2213
2214	0	0		0	0		my $c = @_ ? $_[0] : $_;
2215
2216	0	0					if ($c == 0x00) {
2217	0						return "\x00";
2218							}
2219							else {
2220	0						my @chr = ();
2221	0						while ($c > 0) {
2222	0						unshift @chr, ($c % 0x100);
2223	0						$c = int($c / 0x100);
2224							}
2225	0						return pack 'C*', @chr;
2226							}
2227							}
2228
2229							#
2230							# Latin-9 order to character (without parameter)
2231							#
2232							sub Char::Elatin9::chr_() {
2233
2234	0			0	0		my $c = $_;
2235
2236	0	0					if ($c == 0x00) {
2237	0						return "\x00";
2238							}
2239							else {
2240	0						my @chr = ();
2241	0						while ($c > 0) {
2242	0						unshift @chr, ($c % 0x100);
2243	0						$c = int($c / 0x100);
2244							}
2245	0						return pack 'C*', @chr;
2246							}
2247							}
2248
2249							#
2250							# Latin-9 path globbing (with parameter)
2251							#
2252							sub Char::Elatin9::glob($) {
2253
2254	0	0		0	0		if (wantarray) {
2255	0						my @glob = _DOS_like_glob(@_);
2256	0						for my $glob (@glob) {
2257	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2258							}
2259	0						return @glob;
2260							}
2261							else {
2262	0						my $glob = _DOS_like_glob(@_);
2263	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2264	0						return $glob;
2265							}
2266							}
2267
2268							#
2269							# Latin-9 path globbing (without parameter)
2270							#
2271							sub Char::Elatin9::glob_() {
2272
2273	0	0		0	0		if (wantarray) {
2274	0						my @glob = _DOS_like_glob();
2275	0						for my $glob (@glob) {
2276	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2277							}
2278	0						return @glob;
2279							}
2280							else {
2281	0						my $glob = _DOS_like_glob();
2282	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2283	0						return $glob;
2284							}
2285							}
2286
2287							#
2288							# Latin-9 path globbing via File::DosGlob 1.10
2289							#
2290							# Often I confuse "_dosglob" and "_doglob".
2291							# So, I renamed "_dosglob" to "_DOS_like_glob".
2292							#
2293							my %iter;
2294							my %entries;
2295							sub _DOS_like_glob {
2296
2297							# context (keyed by second cxix argument provided by core)
2298	0			0			my($expr,$cxix) = @_;
2299
2300							# glob without args defaults to $_
2301	0	0					$expr = $_ if not defined $expr;
2302
2303							# represents the current user's home directory
2304							#
2305							# 7.3. Expanding Tildes in Filenames
2306							# in Chapter 7. File Access
2307							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2308							#
2309							# and File::HomeDir, File::HomeDir::Windows module
2310
2311							# DOS-like system
2312	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2313	0						$expr =~ s{ \A ~ (?= [^/\\] ) }
2314	0						{ my_home_MSWin32() }oxmse;
2315							}
2316
2317							# UNIX-like system
2318							else {
2319	0						$expr =~ s{ \A ~ ( (?:[^/])* ) }
2320	0	0	0				{ $1 ? (eval(q{(getpwnam($1))[7]})\|\|my_home()) : my_home() }oxmse;
2321							}
2322
2323							# assume global context if not provided one
2324	0	0					$cxix = '_G_' if not defined $cxix;
2325	0	0					$iter{$cxix} = 0 if not exists $iter{$cxix};
2326
2327							# if we're just beginning, do it all first
2328	0	0					if ($iter{$cxix} == 0) {
2329	0						$entries{$cxix} = [ _do_glob(1, _parse_line($expr)) ];
2330							}
2331
2332							# chuck it all out, quick or slow
2333	0	0					if (wantarray) {
2334	0						delete $iter{$cxix};
2335	0						return @{delete $entries{$cxix}};
	0
2336							}
2337							else {
2338	0	0					if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
	0
2339	0						return shift @{$entries{$cxix}};
	0
2340							}
2341							else {
2342							# return undef for EOL
2343	0						delete $iter{$cxix};
2344	0						delete $entries{$cxix};
2345	0						return undef;
2346							}
2347							}
2348							}
2349
2350							#
2351							# Latin-9 path globbing subroutine
2352							#
2353							sub _do_glob {
2354
2355	0			0			my($cond,@expr) = @_;
2356	0						my @glob = ();
2357	0						my $fix_drive_relative_paths = 0;
2358
2359							OUTER:
2360	0						for my $expr (@expr) {
2361	0	0					next OUTER if not defined $expr;
2362	0	0					next OUTER if $expr eq '';
2363
2364	0						my @matched = ();
2365	0						my @globdir = ();
2366	0						my $head = '.';
2367	0						my $pathsep = '/';
2368	0						my $tail;
2369
2370							# if argument is within quotes strip em and do no globbing
2371	0	0					if ($expr =~ /\A " ((?:$q_char)*) " \z/oxms) {
2372	0						$expr = $1;
2373	0	0					if ($cond eq 'd') {
2374	0	0					if (-d $expr) {
2375	0						push @glob, $expr;
2376							}
2377							}
2378							else {
2379	0	0					if (-e $expr) {
2380	0						push @glob, $expr;
2381							}
2382							}
2383	0						next OUTER;
2384							}
2385
2386							# wildcards with a drive prefix such as h:*.pm must be changed
2387							# to h:./*.pm to expand correctly
2388	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2389	0	0					if ($expr =~ s# \A ((?:[A-Za-z]:)?) ([^/\\]) #$1./$2#oxms) {
2390	0						$fix_drive_relative_paths = 1;
2391							}
2392							}
2393
2394	0	0					if (($head, $tail) = _parse_path($expr,$pathsep)) {
2395	0	0					if ($tail eq '') {
2396	0						push @glob, $expr;
2397	0						next OUTER;
2398							}
2399	0	0					if ($head =~ / \A (?:$q_char)? [?] /oxms) {
2400	0	0					if (@globdir = _do_glob('d', $head)) {
2401	0						push @glob, _do_glob($cond, map {"$_$pathsep$tail"} @globdir);
	0
2402	0						next OUTER;
2403							}
2404							}
2405	0	0	0				if ($head eq '' or $head =~ /\A [A-Za-z]: \z/oxms) {
2406	0						$head .= $pathsep;
2407							}
2408	0						$expr = $tail;
2409							}
2410
2411							# If file component has no wildcards, we can avoid opendir
2412	0	0					if ($expr !~ / \A (?:$q_char)? [?] /oxms) {
2413	0	0					if ($head eq '.') {
2414	0						$head = '';
2415							}
2416	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2417	0						$head .= $pathsep;
2418							}
2419	0						$head .= $expr;
2420	0	0					if ($cond eq 'd') {
2421	0	0					if (-d $head) {
2422	0						push @glob, $head;
2423							}
2424							}
2425							else {
2426	0	0					if (-e $head) {
2427	0						push @glob, $head;
2428							}
2429							}
2430	0						next OUTER;
2431							}
2432	0	0					opendir(*DIR, $head) or next OUTER;
2433	0						my @leaf = readdir DIR;
2434	0						closedir DIR;
2435
2436	0	0					if ($head eq '.') {
2437	0						$head = '';
2438							}
2439	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2440	0						$head .= $pathsep;
2441							}
2442
2443	0						my $pattern = '';
2444	0						while ($expr =~ / \G ($q_char) /oxgc) {
2445	0						my $char = $1;
2446
2447							# 6.9. Matching Shell Globs as Regular Expressions
2448							# in Chapter 6. Pattern Matching
2449							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2450							# (and so on)
2451
2452	0	0					if ($char eq '*') {
		0
		0
2453	0						$pattern .= "(?:$your_char)*",
2454							}
2455							elsif ($char eq '?') {
2456	0						$pattern .= "(?:$your_char)?", # DOS style
2457							# $pattern .= "(?:$your_char)", # UNIX style
2458							}
2459							elsif ((my $fc = Char::Elatin9::fc($char)) ne $char) {
2460	0						$pattern .= $fc;
2461							}
2462							else {
2463	0						$pattern .= quotemeta $char;
2464							}
2465							}
2466	0			0			my $matchsub = sub { Char::Elatin9::fc($_[0]) =~ /\A $pattern \z/xms };
	0
2467
2468							# if ($@) {
2469							# print STDERR "$0: $@\n";
2470							# next OUTER;
2471							# }
2472
2473							INNER:
2474	0						for my $leaf (@leaf) {
2475	0	0	0				if ($leaf eq '.' or $leaf eq '..') {
2476	0						next INNER;
2477							}
2478	0	0	0				if ($cond eq 'd' and not -d "$head$leaf") {
2479	0						next INNER;
2480							}
2481
2482	0	0					if (&$matchsub($leaf)) {
2483	0						push @matched, "$head$leaf";
2484	0						next INNER;
2485							}
2486
2487							# [DOS compatibility special case]
2488							# Failed, add a trailing dot and try again, but only...
2489
2490	0	0	0				if (Char::Elatin9::index($leaf,'.') == -1 and # if name does not have a dot in it and
			0
2491							CORE::length($leaf) <= 8 and # name is shorter than or equal to 8 chars and
2492							Char::Elatin9::index($pattern,'\\.') != -1 # pattern has a dot.
2493							) {
2494	0	0					if (&$matchsub("$leaf.")) {
2495	0						push @matched, "$head$leaf";
2496	0						next INNER;
2497							}
2498							}
2499							}
2500	0	0					if (@matched) {
2501	0						push @glob, @matched;
2502							}
2503							}
2504	0	0					if ($fix_drive_relative_paths) {
2505	0						for my $glob (@glob) {
2506	0						$glob =~ s# \A ([A-Za-z]:) \./ #$1#oxms;
2507							}
2508							}
2509	0						return @glob;
2510							}
2511
2512							#
2513							# Latin-9 parse line
2514							#
2515							sub _parse_line {
2516
2517	0			0			my($line) = @_;
2518
2519	0						$line .= ' ';
2520	0						my @piece = ();
2521	0						while ($line =~ /
2522							" ( (?: [^"] )* ) " \s+ \|
2523							( (?: [^"\s] )* ) \s+
2524							/oxmsg
2525							) {
2526	0	0					push @piece, defined($1) ? $1 : $2;
2527							}
2528	0						return @piece;
2529							}
2530
2531							#
2532							# Latin-9 parse path
2533							#
2534							sub _parse_path {
2535
2536	0			0			my($path,$pathsep) = @_;
2537
2538	0						$path .= '/';
2539	0						my @subpath = ();
2540	0						while ($path =~ /
2541							((?: [^\/\\] )+?) [\/\\]
2542							/oxmsg
2543							) {
2544	0						push @subpath, $1;
2545							}
2546
2547	0						my $tail = pop @subpath;
2548	0						my $head = join $pathsep, @subpath;
2549	0						return $head, $tail;
2550							}
2551
2552							#
2553							# via File::HomeDir::Windows 1.00
2554							#
2555							sub my_home_MSWin32 {
2556
2557							# A lot of unix people and unix-derived tools rely on
2558							# the ability to overload HOME. We will support it too
2559							# so that they can replace raw HOME calls with File::HomeDir.
2560	0	0	0	0	0		if (exists $ENV{'HOME'} and $ENV{'HOME'}) {
		0	0
		0	0
			0
			0
2561	0						return $ENV{'HOME'};
2562							}
2563
2564							# Do we have a user profile?
2565							elsif (exists $ENV{'USERPROFILE'} and $ENV{'USERPROFILE'}) {
2566	0						return $ENV{'USERPROFILE'};
2567							}
2568
2569							# Some Windows use something like $ENV{'HOME'}
2570							elsif (exists $ENV{'HOMEDRIVE'} and exists $ENV{'HOMEPATH'} and $ENV{'HOMEDRIVE'} and $ENV{'HOMEPATH'}) {
2571	0						return join '', $ENV{'HOMEDRIVE'}, $ENV{'HOMEPATH'};
2572							}
2573
2574	0						return undef;
2575							}
2576
2577							#
2578							# via File::HomeDir::Unix 1.00
2579							#
2580							sub my_home {
2581	0			0	0		my $home;
2582
2583	0	0	0				if (exists $ENV{'HOME'} and defined $ENV{'HOME'}) {
		0	0
2584	0						$home = $ENV{'HOME'};
2585							}
2586
2587							# This is from the original code, but I'm guessing
2588							# it means "login directory" and exists on some Unixes.
2589							elsif (exists $ENV{'LOGDIR'} and $ENV{'LOGDIR'}) {
2590	0						$home = $ENV{'LOGDIR'};
2591							}
2592
2593							### More-desperate methods
2594
2595							# Light desperation on any (Unixish) platform
2596							else {
2597	0						$home = eval q{ (getpwuid($<))[7] };
2598							}
2599
2600							# On Unix in general, a non-existant home means "no home"
2601							# For example, "nobody"-like users might use /nonexistant
2602	0	0	0				if (defined $home and ! -d($home)) {
2603	0						$home = undef;
2604							}
2605	0						return $home;
2606							}
2607
2608							#
2609							# ${^PREMATCH}, $PREMATCH, $` the string preceding what was matched
2610							#
2611							sub Char::Elatin9::PREMATCH {
2612	0			0	0		return $`;
2613							}
2614
2615							#
2616							# ${^MATCH}, $MATCH, $& the string that matched
2617							#
2618							sub Char::Elatin9::MATCH {
2619	0			0	0		return $&;
2620							}
2621
2622							#
2623							# ${^POSTMATCH}, $POSTMATCH, $' the string following what was matched
2624							#
2625							sub Char::Elatin9::POSTMATCH {
2626	0			0	0		return $';
2627							}
2628
2629							#
2630							# Latin-9 character to order (with parameter)
2631							#
2632							sub Char::Latin9::ord(;$) {
2633
2634	0	0		0	1		local $_ = shift if @_;
2635
2636	0	0					if (/\A ($q_char) /oxms) {
2637	0						my @ord = unpack 'C*', $1;
2638	0						my $ord = 0;
2639	0						while (my $o = shift @ord) {
2640	0						$ord = $ord * 0x100 + $o;
2641							}
2642	0						return $ord;
2643							}
2644							else {
2645	0						return CORE::ord $_;
2646							}
2647							}
2648
2649							#
2650							# Latin-9 character to order (without parameter)
2651							#
2652							sub Char::Latin9::ord_() {
2653
2654	0	0		0	0		if (/\A ($q_char) /oxms) {
2655	0						my @ord = unpack 'C*', $1;
2656	0						my $ord = 0;
2657	0						while (my $o = shift @ord) {
2658	0						$ord = $ord * 0x100 + $o;
2659							}
2660	0						return $ord;
2661							}
2662							else {
2663	0						return CORE::ord $_;
2664							}
2665							}
2666
2667							#
2668							# Latin-9 reverse
2669							#
2670							sub Char::Latin9::reverse(@) {
2671
2672	0	0		0	0		if (wantarray) {
2673	0						return CORE::reverse @_;
2674							}
2675							else {
2676
2677							# One of us once cornered Larry in an elevator and asked him what
2678							# problem he was solving with this, but he looked as far off into
2679							# the distance as he could in an elevator and said, "It seemed like
2680							# a good idea at the time."
2681
2682	0						return join '', CORE::reverse(join('',@_) =~ /\G ($q_char) /oxmsg);
2683							}
2684							}
2685
2686							#
2687							# Latin-9 getc (with parameter, without parameter)
2688							#
2689							sub Char::Latin9::getc(;*@) {
2690
2691	0			0	0		my($package) = caller;
2692	0	0					my $fh = @_ ? qualify_to_ref(shift,$package) : \*STDIN;
2693	0	0	0				croak 'Too many arguments for Char::Latin9::getc' if @_ and not wantarray;
2694
2695	0						my @length = sort { $a <=> $b } keys %range_tr;
	0
2696	0						my $getc = '';
2697	0						for my $length ($length[0] .. $length[-1]) {
2698	0						$getc .= CORE::getc($fh);
2699	0	0					if (exists $range_tr{CORE::length($getc)}) {
2700	0	0					if ($getc =~ /\A ${Char::Elatin9::dot_s} \z/oxms) {
2701	0	0					return wantarray ? ($getc,@_) : $getc;
2702							}
2703							}
2704							}
2705	0	0					return wantarray ? ($getc,@_) : $getc;
2706							}
2707
2708							#
2709							# Latin-9 length by character
2710							#
2711							sub Char::Latin9::length(;$) {
2712
2713	0	0		0	1		local $_ = shift if @_;
2714
2715	0						local @_ = /\G ($q_char) /oxmsg;
2716	0						return scalar @_;
2717							}
2718
2719							#
2720							# Latin-9 substr by character
2721							#
2722							BEGIN {
2723
2724							# P.232 The lvalue Attribute
2725							# in Chapter 6: Subroutines
2726							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2727
2728							# P.336 The lvalue Attribute
2729							# in Chapter 7: Subroutines
2730							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2731
2732							# P.144 8.4 Lvalue subroutines
2733							# in Chapter 8: perlsub: Perl subroutines
2734							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
2735
2736	177	50	0	177	1	347163	eval sprintf(<<'END', ($] >= 5.014000) ? ':lvalue' : '');
	0	0		0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
2737							# vv----------------*******
2738							sub Char::Latin9::substr($$;$$) %s {
2739
2740							my @char = $_[0] =~ /\G ($q_char) /oxmsg;
2741
2742							# If the substring is beyond either end of the string, substr() returns the undefined
2743							# value and produces a warning. When used as an lvalue, specifying a substring that
2744							# is entirely outside the string raises an exception.
2745							# http://perldoc.perl.org/functions/substr.html
2746
2747							# A return with no argument returns the scalar value undef in scalar context,
2748							# an empty list () in list context, and (naturally) nothing at all in void
2749							# context.
2750
2751							my $offset = $_[1];
2752							if (($offset > scalar(@char)) or ($offset < (-1 * scalar(@char)))) {
2753							return;
2754							}
2755
2756							# substr($string,$offset,$length,$replacement)
2757							if (@_ == 4) {
2758							my(undef,undef,$length,$replacement) = @_;
2759							my $substr = join '', splice(@char, $offset, $length, $replacement);
2760							$_[0] = join '', @char;
2761
2762							# return $substr; this doesn't work, don't say "return"
2763							$substr;
2764							}
2765
2766							# substr($string,$offset,$length)
2767							elsif (@_ == 3) {
2768							my(undef,undef,$length) = @_;
2769							my $octet_offset = 0;
2770							my $octet_length = 0;
2771							if ($offset == 0) {
2772							$octet_offset = 0;
2773							}
2774							elsif ($offset > 0) {
2775							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2776							}
2777							else {
2778							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2779							}
2780							if ($length == 0) {
2781							$octet_length = 0;
2782							}
2783							elsif ($length > 0) {
2784							$octet_length = CORE::length(join '', @char[$offset..$offset+$length-1]);
2785							}
2786							else {
2787							$octet_length = -1 * CORE::length(join '', @char[$#char+$length+1..$#char]);
2788							}
2789							CORE::substr($_[0], $octet_offset, $octet_length);
2790							}
2791
2792							# substr($string,$offset)
2793							else {
2794							my $octet_offset = 0;
2795							if ($offset == 0) {
2796							$octet_offset = 0;
2797							}
2798							elsif ($offset > 0) {
2799							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2800							}
2801							else {
2802							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2803							}
2804							CORE::substr($_[0], $octet_offset);
2805							}
2806							}
2807							END
2808							}
2809
2810							#
2811							# Latin-9 index by character
2812							#
2813							sub Char::Latin9::index($$;$) {
2814
2815	0			0	1		my $index;
2816	0	0					if (@_ == 3) {
2817	0						$index = Char::Elatin9::index($_[0], $_[1], CORE::length(Char::Latin9::substr($_[0], 0, $_[2])));
2818							}
2819							else {
2820	0						$index = Char::Elatin9::index($_[0], $_[1]);
2821							}
2822
2823	0	0					if ($index == -1) {
2824	0						return -1;
2825							}
2826							else {
2827	0						return Char::Latin9::length(CORE::substr $_[0], 0, $index);
2828							}
2829							}
2830
2831							#
2832							# Latin-9 rindex by character
2833							#
2834							sub Char::Latin9::rindex($$;$) {
2835
2836	0			0	1		my $rindex;
2837	0	0					if (@_ == 3) {
2838	0						$rindex = Char::Elatin9::rindex($_[0], $_[1], CORE::length(Char::Latin9::substr($_[0], 0, $_[2])));
2839							}
2840							else {
2841	0						$rindex = Char::Elatin9::rindex($_[0], $_[1]);
2842							}
2843
2844	0	0					if ($rindex == -1) {
2845	0						return -1;
2846							}
2847							else {
2848	0						return Char::Latin9::length(CORE::substr $_[0], 0, $rindex);
2849							}
2850							}
2851
2852							#
2853							# instead of Carp::carp
2854							#
2855							sub carp {
2856	0			0	0		my($package,$filename,$line) = caller(1);
2857	0						print STDERR "@_ at $filename line $line.\n";
2858							}
2859
2860							#
2861							# instead of Carp::croak
2862							#
2863							sub croak {
2864	0			0	0		my($package,$filename,$line) = caller(1);
2865	0						print STDERR "@_ at $filename line $line.\n";
2866	0						die "\n";
2867							}
2868
2869							#
2870							# instead of Carp::cluck
2871							#
2872							sub cluck {
2873	0			0	0		my $i = 0;
2874	0						my @cluck = ();
2875	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2876	0						push @cluck, "[$i] $filename($line) $package::$subroutine\n";
2877	0						$i++;
2878							}
2879	0						print STDERR CORE::reverse @cluck;
2880	0						print STDERR "\n";
2881	0						carp @_;
2882							}
2883
2884							#
2885							# instead of Carp::confess
2886							#
2887							sub confess {
2888	0			0	0		my $i = 0;
2889	0						my @confess = ();
2890	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2891	0						push @confess, "[$i] $filename($line) $package::$subroutine\n";
2892	0						$i++;
2893							}
2894	0						print STDERR CORE::reverse @confess;
2895	0						print STDERR "\n";
2896	0						croak @_;
2897							}
2898
2899							1;
2900
2901							__END__