File Coverage

Char/Elatin4.pm

Criterion	Covered	Total	%
statement	51	954	5.3
branch	4	562	0.7
condition	1	180	0.5
subroutine	20	85	23.5
pod	7	50	14.0
total	83	1831	4.5

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# This file is * FOR CPAN USE ONLY *.
3							#
4							package Char::Elatin4;
5							######################################################################
6							#
7							# Char::Elatin4 - Run-time routines for Char/Latin4.pm
8							#
9							# http://search.cpan.org/dist/Char-Latin4/
10							#
11							# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 INABA Hitoshi
12							######################################################################
13
14	177			177		5869	use 5.00503; # Galapagos Consensus 1998 for primetools
	177					1136
	177					10135
15							# use 5.008001; # Lancaster Consensus 2013 for toolchains
16
17							# 12.3. Delaying use Until Runtime
18							# in Chapter 12. Packages, Libraries, and Modules
19							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
20							# (and so on)
21
22	177			177		13818	BEGIN { eval q{ use vars qw($VERSION) } }
	177			177		1135
	177					437
	177					37825
23							$VERSION = sprintf '%d.%02d', q$Revision: 1.00 $ =~ /(\d+)/xmsg;
24
25							BEGIN {
26	177	50		177		1127	if ($^X =~ / jperl /oxmsi) {
27	0					0	die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)";
28							}
29	177					310	if (CORE::ord('A') == 193) {
30							die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).";
31							}
32	177					39057	if (CORE::ord('A') != 0x41) {
33							die __FILE__, ": is not US-ASCII script (must be US-ASCII script).";
34							}
35							}
36
37							BEGIN {
38
39							# instead of utf8.pm
40	177			177		11901	eval q{
	177			177		1173
	177			70		347
	177					36424
	70					18675
	59					11918
	52					10583
	54					12772
	56					10452
	63					13704
41							no warnings qw(redefine);
42							*utf8::upgrade = sub { CORE::length $_[0] };
43							*utf8::downgrade = sub { 1 };
44							*utf8::encode = sub { };
45							*utf8::decode = sub { 1 };
46							*utf8::is_utf8 = sub { };
47							*utf8::valid = sub { 1 };
48							};
49	177	50				164720	if ($@) {
50	0					0	*utf8::upgrade = sub { CORE::length $_[0] };
	0					0
51	0					0	*utf8::downgrade = sub { 1 };
	0					0
52	0					0	*utf8::encode = sub { };
	0					0
53	0					0	*utf8::decode = sub { 1 };
	0					0
54	0					0	*utf8::is_utf8 = sub { };
	0					0
55	0					0	*utf8::valid = sub { 1 };
	0					0
56							}
57							}
58
59							# instead of Symbol.pm
60							BEGIN {
61	177			177		1036	my $genpkg = "Symbol::";
62	177					9930	my $genseq = 0;
63
64							sub gensym () {
65	0			0	0	0	my $name = "GEN" . $genseq++;
66
67							# here, no strict qw(refs); if strict.pm exists
68
69	0					0	my $ref = \*{$genpkg . $name};
	0					0
70	0					0	delete $$genpkg{$name};
71	0					0	return $ref;
72							}
73
74							sub qualify ($;$) {
75	0			0	0	0	my ($name) = @_;
76	0	0	0			0	if (!ref($name) && (Char::Elatin4::index($name, '::') == -1) && (Char::Elatin4::index($name, "'") == -1)) {
			0
77	0					0	my $pkg;
78	0					0	my %global = map {$_ => 1} qw(ARGV ARGVOUT ENV INC SIG STDERR STDIN STDOUT DATA);
	0					0
79
80							# Global names: special character, "^xyz", or other.
81	0	0	0			0	if ($name =~ /^(([^a-z])\|(\^[a-z_]+))\z/i \|\| $global{$name}) {
82							# RGS 2001-11-05 : translate leading ^X to control-char
83	0					0	$name =~ s/^\^([a-z_])/'qq(\c'.$1.')'/eei;
	0					0
84	0					0	$pkg = "main";
85							}
86							else {
87	0	0				0	$pkg = (@_ > 1) ? $_[1] : caller;
88							}
89	0					0	$name = $pkg . "::" . $name;
90							}
91	0					0	return $name;
92							}
93
94							sub qualify_to_ref ($;$) {
95
96							# here, no strict qw(refs); if strict.pm exists
97
98	0	0		0	0	0	return \*{ qualify $_[0], @_ > 1 ? $_[1] : caller };
	0					0
99							}
100							}
101
102							# Column: local $@
103							# in Chapter 9. Osaete okitai Perl no kiso
104							# of ISBN 10: 4798119172 \| ISBN 13: 978-4798119175 MODAN Perl NYUMON
105							# (and so on)
106
107							# use strict; if strict.pm exists
108							BEGIN {
109	177	50		177		498	if (eval { local $@; CORE::require strict }) {
	177					754
	177					2494
110	177					51194	strict::->import;
111							}
112							}
113
114							# P.714 29.2.39. flock
115							# in Chapter 29: Functions
116							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
117
118							# P.863 flock
119							# in Chapter 27: Functions
120							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
121
122							sub LOCK_SH() {1}
123							sub LOCK_EX() {2}
124							sub LOCK_UN() {8}
125							sub LOCK_NB() {4}
126
127							# instead of Carp.pm
128							sub carp;
129							sub croak;
130							sub cluck;
131							sub confess;
132
133							my $your_char = q{[\x00-\xFF]};
134
135							# regexp of character
136	177			177		13975	BEGIN { eval q{ use vars qw($q_char) } }
	177			177		1722
	177					396
	177					18623
137							$q_char = qr/$your_char/oxms;
138
139							#
140							# Latin-4 character range per length
141							#
142							my %range_tr = ();
143
144							#
145							# alias of encoding name
146							#
147	177			177		11507	BEGIN { eval q{ use vars qw($encoding_alias) } }
	177			177		1988
	177					591
	177					539857
148
149							#
150							# Latin-4 case conversion
151							#
152							my %lc = ();
153							@lc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
154							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
155							my %uc = ();
156							@uc{qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)} =
157							qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
158							my %fc = ();
159							@fc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
160							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
161
162							if (0) {
163							}
164
165							elsif (__PACKAGE__ =~ / \b Elatin4 \z/oxms) {
166							%range_tr = (
167							1 => [ [0x00..0xFF],
168							],
169							);
170							$encoding_alias = qr/ \b (?: iso[-_ ]?8859-4 \| iec[- ]?8859-4 \| latin-?4 ) \b /oxmsi;
171
172							%lc = (%lc,
173							"\xA1" => "\xB1", # LATIN LETTER A WITH OGONEK
174							"\xA3" => "\xB3", # LATIN LETTER R WITH CEDILLA
175							"\xA5" => "\xB5", # LATIN LETTER I WITH TILDE
176							"\xA6" => "\xB6", # LATIN LETTER L WITH CEDILLA
177							"\xA9" => "\xB9", # LATIN LETTER S WITH CARON
178							"\xAA" => "\xBA", # LATIN LETTER E WITH MACRON
179							"\xAB" => "\xBB", # LATIN LETTER G WITH CEDILLA
180							"\xAC" => "\xBC", # LATIN LETTER T WITH STROKE
181							"\xAE" => "\xBE", # LATIN LETTER Z WITH CARON
182							"\xBD" => "\xBF", # LATIN LETTER ENG
183							"\xC0" => "\xE0", # LATIN LETTER A WITH MACRON
184							"\xC1" => "\xE1", # LATIN LETTER A WITH ACUTE
185							"\xC2" => "\xE2", # LATIN LETTER A WITH CIRCUMFLEX
186							"\xC3" => "\xE3", # LATIN LETTER A WITH TILDE
187							"\xC4" => "\xE4", # LATIN LETTER A WITH DIAERESIS
188							"\xC5" => "\xE5", # LATIN LETTER A WITH RING ABOVE
189							"\xC6" => "\xE6", # LATIN LETTER AE
190							"\xC7" => "\xE7", # LATIN LETTER I WITH OGONEK
191							"\xC8" => "\xE8", # LATIN LETTER C WITH CARON
192							"\xC9" => "\xE9", # LATIN LETTER E WITH ACUTE
193							"\xCA" => "\xEA", # LATIN LETTER E WITH OGONEK
194							"\xCB" => "\xEB", # LATIN LETTER E WITH DIAERESIS
195							"\xCC" => "\xEC", # LATIN LETTER E WITH DOT ABOVE
196							"\xCD" => "\xED", # LATIN LETTER I WITH ACUTE
197							"\xCE" => "\xEE", # LATIN LETTER I WITH CIRCUMFLEX
198							"\xCF" => "\xEF", # LATIN LETTER I WITH MACRON
199							"\xD0" => "\xF0", # LATIN LETTER D WITH STROKE
200							"\xD1" => "\xF1", # LATIN LETTER N WITH CEDILLA
201							"\xD2" => "\xF2", # LATIN LETTER O WITH MACRON
202							"\xD3" => "\xF3", # LATIN LETTER K WITH CEDILLA
203							"\xD4" => "\xF4", # LATIN LETTER O WITH CIRCUMFLEX
204							"\xD5" => "\xF5", # LATIN LETTER O WITH TILDE
205							"\xD6" => "\xF6", # LATIN LETTER O WITH DIAERESIS
206							"\xD8" => "\xF8", # LATIN LETTER O WITH STROKE
207							"\xD9" => "\xF9", # LATIN LETTER U WITH OGONEK
208							"\xDA" => "\xFA", # LATIN LETTER U WITH ACUTE
209							"\xDB" => "\xFB", # LATIN LETTER U WITH CIRCUMFLEX
210							"\xDC" => "\xFC", # LATIN LETTER U WITH DIAERESIS
211							"\xDD" => "\xFD", # LATIN LETTER U WITH TILDE
212							"\xDE" => "\xFE", # LATIN LETTER U WITH MACRON
213							);
214
215							%uc = (%uc,
216							"\xB1" => "\xA1", # LATIN LETTER A WITH OGONEK
217							"\xB3" => "\xA3", # LATIN LETTER R WITH CEDILLA
218							"\xB5" => "\xA5", # LATIN LETTER I WITH TILDE
219							"\xB6" => "\xA6", # LATIN LETTER L WITH CEDILLA
220							"\xB9" => "\xA9", # LATIN LETTER S WITH CARON
221							"\xBA" => "\xAA", # LATIN LETTER E WITH MACRON
222							"\xBB" => "\xAB", # LATIN LETTER G WITH CEDILLA
223							"\xBC" => "\xAC", # LATIN LETTER T WITH STROKE
224							"\xBE" => "\xAE", # LATIN LETTER Z WITH CARON
225							"\xBF" => "\xBD", # LATIN LETTER ENG
226							"\xE0" => "\xC0", # LATIN LETTER A WITH MACRON
227							"\xE1" => "\xC1", # LATIN LETTER A WITH ACUTE
228							"\xE2" => "\xC2", # LATIN LETTER A WITH CIRCUMFLEX
229							"\xE3" => "\xC3", # LATIN LETTER A WITH TILDE
230							"\xE4" => "\xC4", # LATIN LETTER A WITH DIAERESIS
231							"\xE5" => "\xC5", # LATIN LETTER A WITH RING ABOVE
232							"\xE6" => "\xC6", # LATIN LETTER AE
233							"\xE7" => "\xC7", # LATIN LETTER I WITH OGONEK
234							"\xE8" => "\xC8", # LATIN LETTER C WITH CARON
235							"\xE9" => "\xC9", # LATIN LETTER E WITH ACUTE
236							"\xEA" => "\xCA", # LATIN LETTER E WITH OGONEK
237							"\xEB" => "\xCB", # LATIN LETTER E WITH DIAERESIS
238							"\xEC" => "\xCC", # LATIN LETTER E WITH DOT ABOVE
239							"\xED" => "\xCD", # LATIN LETTER I WITH ACUTE
240							"\xEE" => "\xCE", # LATIN LETTER I WITH CIRCUMFLEX
241							"\xEF" => "\xCF", # LATIN LETTER I WITH MACRON
242							"\xF0" => "\xD0", # LATIN LETTER D WITH STROKE
243							"\xF1" => "\xD1", # LATIN LETTER N WITH CEDILLA
244							"\xF2" => "\xD2", # LATIN LETTER O WITH MACRON
245							"\xF3" => "\xD3", # LATIN LETTER K WITH CEDILLA
246							"\xF4" => "\xD4", # LATIN LETTER O WITH CIRCUMFLEX
247							"\xF5" => "\xD5", # LATIN LETTER O WITH TILDE
248							"\xF6" => "\xD6", # LATIN LETTER O WITH DIAERESIS
249							"\xF8" => "\xD8", # LATIN LETTER O WITH STROKE
250							"\xF9" => "\xD9", # LATIN LETTER U WITH OGONEK
251							"\xFA" => "\xDA", # LATIN LETTER U WITH ACUTE
252							"\xFB" => "\xDB", # LATIN LETTER U WITH CIRCUMFLEX
253							"\xFC" => "\xDC", # LATIN LETTER U WITH DIAERESIS
254							"\xFD" => "\xDD", # LATIN LETTER U WITH TILDE
255							"\xFE" => "\xDE", # LATIN LETTER U WITH MACRON
256							);
257
258							%fc = (%fc,
259							"\xA1" => "\xB1", # LATIN CAPITAL LETTER A WITH OGONEK --> LATIN SMALL LETTER A WITH OGONEK
260							"\xA3" => "\xB3", # LATIN CAPITAL LETTER R WITH CEDILLA --> LATIN SMALL LETTER R WITH CEDILLA
261							"\xA5" => "\xB5", # LATIN CAPITAL LETTER I WITH TILDE --> LATIN SMALL LETTER I WITH TILDE
262							"\xA6" => "\xB6", # LATIN CAPITAL LETTER L WITH CEDILLA --> LATIN SMALL LETTER L WITH CEDILLA
263							"\xA9" => "\xB9", # LATIN CAPITAL LETTER S WITH CARON --> LATIN SMALL LETTER S WITH CARON
264							"\xAA" => "\xBA", # LATIN CAPITAL LETTER E WITH MACRON --> LATIN SMALL LETTER E WITH MACRON
265							"\xAB" => "\xBB", # LATIN CAPITAL LETTER G WITH CEDILLA --> LATIN SMALL LETTER G WITH CEDILLA
266							"\xAC" => "\xBC", # LATIN CAPITAL LETTER T WITH STROKE --> LATIN SMALL LETTER T WITH STROKE
267							"\xAE" => "\xBE", # LATIN CAPITAL LETTER Z WITH CARON --> LATIN SMALL LETTER Z WITH CARON
268							"\xBD" => "\xBF", # LATIN CAPITAL LETTER ENG --> LATIN SMALL LETTER ENG
269							"\xC0" => "\xE0", # LATIN CAPITAL LETTER A WITH MACRON --> LATIN SMALL LETTER A WITH MACRON
270							"\xC1" => "\xE1", # LATIN CAPITAL LETTER A WITH ACUTE --> LATIN SMALL LETTER A WITH ACUTE
271							"\xC2" => "\xE2", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX --> LATIN SMALL LETTER A WITH CIRCUMFLEX
272							"\xC3" => "\xE3", # LATIN CAPITAL LETTER A WITH TILDE --> LATIN SMALL LETTER A WITH TILDE
273							"\xC4" => "\xE4", # LATIN CAPITAL LETTER A WITH DIAERESIS --> LATIN SMALL LETTER A WITH DIAERESIS
274							"\xC5" => "\xE5", # LATIN CAPITAL LETTER A WITH RING ABOVE --> LATIN SMALL LETTER A WITH RING ABOVE
275							"\xC6" => "\xE6", # LATIN CAPITAL LETTER AE --> LATIN SMALL LETTER AE
276							"\xC7" => "\xE7", # LATIN CAPITAL LETTER I WITH OGONEK --> LATIN SMALL LETTER I WITH OGONEK
277							"\xC8" => "\xE8", # LATIN CAPITAL LETTER C WITH CARON --> LATIN SMALL LETTER C WITH CARON
278							"\xC9" => "\xE9", # LATIN CAPITAL LETTER E WITH ACUTE --> LATIN SMALL LETTER E WITH ACUTE
279							"\xCA" => "\xEA", # LATIN CAPITAL LETTER E WITH OGONEK --> LATIN SMALL LETTER E WITH OGONEK
280							"\xCB" => "\xEB", # LATIN CAPITAL LETTER E WITH DIAERESIS --> LATIN SMALL LETTER E WITH DIAERESIS
281							"\xCC" => "\xEC", # LATIN CAPITAL LETTER E WITH DOT ABOVE --> LATIN SMALL LETTER E WITH DOT ABOVE
282							"\xCD" => "\xED", # LATIN CAPITAL LETTER I WITH ACUTE --> LATIN SMALL LETTER I WITH ACUTE
283							"\xCE" => "\xEE", # LATIN CAPITAL LETTER I WITH CIRCUMFLEX --> LATIN SMALL LETTER I WITH CIRCUMFLEX
284							"\xCF" => "\xEF", # LATIN CAPITAL LETTER I WITH MACRON --> LATIN SMALL LETTER I WITH MACRON
285							"\xD0" => "\xF0", # LATIN CAPITAL LETTER D WITH STROKE --> LATIN SMALL LETTER D WITH STROKE
286							"\xD1" => "\xF1", # LATIN CAPITAL LETTER N WITH CEDILLA --> LATIN SMALL LETTER N WITH CEDILLA
287							"\xD2" => "\xF2", # LATIN CAPITAL LETTER O WITH MACRON --> LATIN SMALL LETTER O WITH MACRON
288							"\xD3" => "\xF3", # LATIN CAPITAL LETTER K WITH CEDILLA --> LATIN SMALL LETTER K WITH CEDILLA
289							"\xD4" => "\xF4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX --> LATIN SMALL LETTER O WITH CIRCUMFLEX
290							"\xD5" => "\xF5", # LATIN CAPITAL LETTER O WITH TILDE --> LATIN SMALL LETTER O WITH TILDE
291							"\xD6" => "\xF6", # LATIN CAPITAL LETTER O WITH DIAERESIS --> LATIN SMALL LETTER O WITH DIAERESIS
292							"\xD8" => "\xF8", # LATIN CAPITAL LETTER O WITH STROKE --> LATIN SMALL LETTER O WITH STROKE
293							"\xD9" => "\xF9", # LATIN CAPITAL LETTER U WITH OGONEK --> LATIN SMALL LETTER U WITH OGONEK
294							"\xDA" => "\xFA", # LATIN CAPITAL LETTER U WITH ACUTE --> LATIN SMALL LETTER U WITH ACUTE
295							"\xDB" => "\xFB", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX --> LATIN SMALL LETTER U WITH CIRCUMFLEX
296							"\xDC" => "\xFC", # LATIN CAPITAL LETTER U WITH DIAERESIS --> LATIN SMALL LETTER U WITH DIAERESIS
297							"\xDD" => "\xFD", # LATIN CAPITAL LETTER U WITH TILDE --> LATIN SMALL LETTER U WITH TILDE
298							"\xDE" => "\xFE", # LATIN CAPITAL LETTER U WITH MACRON --> LATIN SMALL LETTER U WITH MACRON
299							"\xDF" => "\x73\x73", # LATIN SMALL LETTER SHARP S --> LATIN SMALL LETTER S, LATIN SMALL LETTER S
300							);
301							}
302
303							else {
304							croak "Don't know my package name '@{[__PACKAGE__]}'";
305							}
306
307							#
308							# @ARGV wildcard globbing
309							#
310							sub import {
311
312	0	0		0		0	if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
313	0					0	my @argv = ();
314	0					0	for (@ARGV) {
315
316							# has space
317	0	0				0	if (/\A (?:$q_char)*? [ ] /oxms) {
		0
318	0	0				0	if (my @glob = Char::Elatin4::glob(qq{"$_"})) {
319	0					0	push @argv, @glob;
320							}
321							else {
322	0					0	push @argv, $_;
323							}
324							}
325
326							# has wildcard metachar
327							elsif (/\A (?:$q_char)? [?] /oxms) {
328	0	0				0	if (my @glob = Char::Elatin4::glob($_)) {
329	0					0	push @argv, @glob;
330							}
331							else {
332	0					0	push @argv, $_;
333							}
334							}
335
336							# no wildcard globbing
337							else {
338	0					0	push @argv, $_;
339							}
340							}
341	0					0	@ARGV = @argv;
342							}
343							}
344
345							# P.230 Care with Prototypes
346							# in Chapter 6: Subroutines
347							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
348							#
349							# If you aren't careful, you can get yourself into trouble with prototypes.
350							# But if you are careful, you can do a lot of neat things with them. This is
351							# all very powerful, of course, and should only be used in moderation to make
352							# the world a better place.
353
354							# P.332 Care with Prototypes
355							# in Chapter 7: Subroutines
356							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
357							#
358							# If you aren't careful, you can get yourself into trouble with prototypes.
359							# But if you are careful, you can do a lot of neat things with them. This is
360							# all very powerful, of course, and should only be used in moderation to make
361							# the world a better place.
362
363							#
364							# Prototypes of subroutines
365							#
366	0			0		0	sub unimport {}
367							sub Char::Elatin4::split(;$$$);
368							sub Char::Elatin4::tr($$$$;$);
369							sub Char::Elatin4::chop(@);
370							sub Char::Elatin4::index($$;$);
371							sub Char::Elatin4::rindex($$;$);
372							sub Char::Elatin4::lcfirst(@);
373							sub Char::Elatin4::lcfirst_();
374							sub Char::Elatin4::lc(@);
375							sub Char::Elatin4::lc_();
376							sub Char::Elatin4::ucfirst(@);
377							sub Char::Elatin4::ucfirst_();
378							sub Char::Elatin4::uc(@);
379							sub Char::Elatin4::uc_();
380							sub Char::Elatin4::fc(@);
381							sub Char::Elatin4::fc_();
382							sub Char::Elatin4::ignorecase;
383							sub Char::Elatin4::classic_character_class;
384							sub Char::Elatin4::capture;
385							sub Char::Elatin4::chr(;$);
386							sub Char::Elatin4::chr_();
387							sub Char::Elatin4::glob($);
388							sub Char::Elatin4::glob_();
389
390							sub Char::Latin4::ord(;$);
391							sub Char::Latin4::ord_();
392							sub Char::Latin4::reverse(@);
393							sub Char::Latin4::getc(;*@);
394							sub Char::Latin4::length(;$);
395							sub Char::Latin4::substr($$;$$);
396							sub Char::Latin4::index($$;$);
397							sub Char::Latin4::rindex($$;$);
398
399							#
400							# Regexp work
401							#
402	177			177		17853	BEGIN { eval q{ use vars qw(
	177			177		1507
	177					529
	177					137423
403							$Char::Latin4::re_a
404							$Char::Latin4::re_t
405							$Char::Latin4::re_n
406							$Char::Latin4::re_r
407							) } }
408
409							#
410							# Character class
411							#
412	177			177		16644	BEGIN { eval q{ use vars qw(
	177			177		1240
	177					780
	177					4184956
413							$dot
414							$dot_s
415							$eD
416							$eS
417							$eW
418							$eH
419							$eV
420							$eR
421							$eN
422							$not_alnum
423							$not_alpha
424							$not_ascii
425							$not_blank
426							$not_cntrl
427							$not_digit
428							$not_graph
429							$not_lower
430							$not_lower_i
431							$not_print
432							$not_punct
433							$not_space
434							$not_upper
435							$not_upper_i
436							$not_word
437							$not_xdigit
438							$eb
439							$eB
440							) } }
441
442							${Char::Elatin4::dot} = qr{(?:[^\x0A])};
443							${Char::Elatin4::dot_s} = qr{(?:[\x00-\xFF])};
444							${Char::Elatin4::eD} = qr{(?:[^0-9])};
445
446							# Vertical tabs are now whitespace
447							# \s in a regex now matches a vertical tab in all circumstances.
448							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
449							# ${Char::Elatin4::eS} = qr{(?:[^\x09\x0A \x0C\x0D\x20])};
450							# ${Char::Elatin4::eS} = qr{(?:[^\x09\x0A\x0B\x0C\x0D\x20])};
451							${Char::Elatin4::eS} = qr{(?:[^\s])};
452
453							${Char::Elatin4::eW} = qr{(?:[^0-9A-Z_a-z])};
454							${Char::Elatin4::eH} = qr{(?:[^\x09\x20])};
455							${Char::Elatin4::eV} = qr{(?:[^\x0A\x0B\x0C\x0D])};
456							${Char::Elatin4::eR} = qr{(?:\x0D\x0A\|[\x0A\x0D])};
457							${Char::Elatin4::eN} = qr{(?:[^\x0A])};
458							${Char::Elatin4::not_alnum} = qr{(?:[^\x30-\x39\x41-\x5A\x61-\x7A])};
459							${Char::Elatin4::not_alpha} = qr{(?:[^\x41-\x5A\x61-\x7A])};
460							${Char::Elatin4::not_ascii} = qr{(?:[^\x00-\x7F])};
461							${Char::Elatin4::not_blank} = qr{(?:[^\x09\x20])};
462							${Char::Elatin4::not_cntrl} = qr{(?:[^\x00-\x1F\x7F])};
463							${Char::Elatin4::not_digit} = qr{(?:[^\x30-\x39])};
464							${Char::Elatin4::not_graph} = qr{(?:[^\x21-\x7F])};
465							${Char::Elatin4::not_lower} = qr{(?:[^\x61-\x7A])};
466							${Char::Elatin4::not_lower_i} = qr{(?:[\x00-\xFF])};
467							${Char::Elatin4::not_print} = qr{(?:[^\x20-\x7F])};
468							${Char::Elatin4::not_punct} = qr{(?:[^\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E])};
469							${Char::Elatin4::not_space} = qr{(?:[^\s\x0B])};
470							${Char::Elatin4::not_upper} = qr{(?:[^\x41-\x5A])};
471							${Char::Elatin4::not_upper_i} = qr{(?:[\x00-\xFF])};
472							${Char::Elatin4::not_word} = qr{(?:[^\x30-\x39\x41-\x5A\x5F\x61-\x7A])};
473							${Char::Elatin4::not_xdigit} = qr{(?:[^\x30-\x39\x41-\x46\x61-\x66])};
474							${Char::Elatin4::eb} = qr{(?:\A(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[0-9A-Z_a-z])\|(?<=[0-9A-Z_a-z])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]\|\z))};
475							${Char::Elatin4::eB} = qr{(?:(?<=[0-9A-Z_a-z])(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]))};
476
477							# avoid: Name "Char::Elatin4::foo" used only once: possible typo at here.
478							${Char::Elatin4::dot} = ${Char::Elatin4::dot};
479							${Char::Elatin4::dot_s} = ${Char::Elatin4::dot_s};
480							${Char::Elatin4::eD} = ${Char::Elatin4::eD};
481							${Char::Elatin4::eS} = ${Char::Elatin4::eS};
482							${Char::Elatin4::eW} = ${Char::Elatin4::eW};
483							${Char::Elatin4::eH} = ${Char::Elatin4::eH};
484							${Char::Elatin4::eV} = ${Char::Elatin4::eV};
485							${Char::Elatin4::eR} = ${Char::Elatin4::eR};
486							${Char::Elatin4::eN} = ${Char::Elatin4::eN};
487							${Char::Elatin4::not_alnum} = ${Char::Elatin4::not_alnum};
488							${Char::Elatin4::not_alpha} = ${Char::Elatin4::not_alpha};
489							${Char::Elatin4::not_ascii} = ${Char::Elatin4::not_ascii};
490							${Char::Elatin4::not_blank} = ${Char::Elatin4::not_blank};
491							${Char::Elatin4::not_cntrl} = ${Char::Elatin4::not_cntrl};
492							${Char::Elatin4::not_digit} = ${Char::Elatin4::not_digit};
493							${Char::Elatin4::not_graph} = ${Char::Elatin4::not_graph};
494							${Char::Elatin4::not_lower} = ${Char::Elatin4::not_lower};
495							${Char::Elatin4::not_lower_i} = ${Char::Elatin4::not_lower_i};
496							${Char::Elatin4::not_print} = ${Char::Elatin4::not_print};
497							${Char::Elatin4::not_punct} = ${Char::Elatin4::not_punct};
498							${Char::Elatin4::not_space} = ${Char::Elatin4::not_space};
499							${Char::Elatin4::not_upper} = ${Char::Elatin4::not_upper};
500							${Char::Elatin4::not_upper_i} = ${Char::Elatin4::not_upper_i};
501							${Char::Elatin4::not_word} = ${Char::Elatin4::not_word};
502							${Char::Elatin4::not_xdigit} = ${Char::Elatin4::not_xdigit};
503							${Char::Elatin4::eb} = ${Char::Elatin4::eb};
504							${Char::Elatin4::eB} = ${Char::Elatin4::eB};
505
506							#
507							# Latin-4 split
508							#
509							sub Char::Elatin4::split(;$$$) {
510
511							# P.794 29.2.161. split
512							# in Chapter 29: Functions
513							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
514
515							# P.951 split
516							# in Chapter 27: Functions
517							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
518
519	0			0	0	0	my $pattern = $_[0];
520	0					0	my $string = $_[1];
521	0					0	my $limit = $_[2];
522
523							# if $pattern is also omitted or is the literal space, " "
524	0	0				0	if (not defined $pattern) {
525	0					0	$pattern = ' ';
526							}
527
528							# if $string is omitted, the function splits the $_ string
529	0	0				0	if (not defined $string) {
530	0	0				0	if (defined $_) {
531	0					0	$string = $_;
532							}
533							else {
534	0					0	$string = '';
535							}
536							}
537
538	0					0	my @split = ();
539
540							# when string is empty
541	0	0				0	if ($string eq '') {
		0
542
543							# resulting list value in list context
544	0	0				0	if (wantarray) {
545	0					0	return @split;
546							}
547
548							# count of substrings in scalar context
549							else {
550	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
551	0					0	@_ = @split;
552	0					0	return scalar @_;
553							}
554							}
555
556							# split's first argument is more consistently interpreted
557							#
558							# After some changes earlier in v5.17, split's behavior has been simplified:
559							# if the PATTERN argument evaluates to a string containing one space, it is
560							# treated the way that a literal string containing one space once was.
561							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#split's_first_argument_is_more_consistently_interpreted
562
563							# if $pattern is also omitted or is the literal space, " ", the function splits
564							# on whitespace, /\s+/, after skipping any leading whitespace
565							# (and so on)
566
567							elsif ($pattern eq ' ') {
568	0	0				0	if (not defined $limit) {
569	0					0	return CORE::split(' ', $string);
570							}
571							else {
572	0					0	return CORE::split(' ', $string, $limit);
573							}
574							}
575
576							# if $limit is negative, it is treated as if an arbitrarily large $limit has been specified
577	0	0	0			0	if ((not defined $limit) or ($limit <= 0)) {
		0
578
579							# a pattern capable of matching either the null string or something longer than the
580							# null string will split the value of $string into separate characters wherever it
581							# matches the null string between characters
582							# (and so on)
583
584	0	0				0	if ('' =~ / \A $pattern \z /xms) {
585	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
586	0					0	my $limit = scalar(() = $string =~ /($pattern)/oxmsg);
587
588							# P.1024 Appendix W.10 Multibyte Processing
589							# of ISBN 1-56592-224-7 CJKV Information Processing
590							# (and so on)
591
592							# the //m modifier is assumed when you split on the pattern /^/
593							# (and so on)
594
595							# V
596	0		0			0	while ((--$limit > 0) and ($string =~ s/\A((?:$q_char)+?)$pattern//m)) {
597
598							# if the $pattern contains parentheses, then the substring matched by each pair of parentheses
599							# is included in the resulting list, interspersed with the fields that are ordinarily returned
600							# (and so on)
601
602	0					0	local $@;
603	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
604	0					0	push @split, eval('$' . $digit);
605							}
606							}
607							}
608
609							else {
610	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
611
612							# V
613	0					0	while ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
614	0					0	local $@;
615	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
616	0					0	push @split, eval('$' . $digit);
617							}
618							}
619							}
620							}
621
622							elsif ($limit > 0) {
623	0	0				0	if ('' =~ / \A $pattern \z /xms) {
624	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
625	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
626
627							# V
628	0	0				0	if ($string =~ s/\A((?:$q_char)+?)$pattern//m) {
629	0					0	local $@;
630	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
631	0					0	push @split, eval('$' . $digit);
632							}
633							}
634							}
635							}
636							else {
637	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
638	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
639
640							# V
641	0	0				0	if ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
642	0					0	local $@;
643	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
644	0					0	push @split, eval('$' . $digit);
645							}
646							}
647							}
648							}
649							}
650
651	0	0				0	if (CORE::length($string) > 0) {
652	0					0	push @split, $string;
653							}
654
655							# if $_[2] (NOT "$limit") is omitted or zero, trailing null fields are stripped from the result
656	0	0	0			0	if ((not defined $_[2]) or ($_[2] == 0)) {
657	0		0			0	while ((scalar(@split) >= 1) and ($split[-1] eq '')) {
658	0					0	pop @split;
659							}
660							}
661
662							# resulting list value in list context
663	0	0				0	if (wantarray) {
664	0					0	return @split;
665							}
666
667							# count of substrings in scalar context
668							else {
669	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
670	0					0	@_ = @split;
671	0					0	return scalar @_;
672							}
673							}
674
675							#
676							# get last subexpression offsets
677							#
678							sub _last_subexpression_offsets {
679	0			0		0	my $pattern = $_[0];
680
681							# remove comment
682	0					0	$pattern =~ s/$\?\# .*? $//oxmsg;
683
684	0					0	my $modifier = '';
685	0	0				0	if ($pattern =~ /\(\?\^? ([\-A-Za-z]+) :/oxms) {
686	0					0	$modifier = $1;
687	0					0	$modifier =~ s/-[A-Za-z]*//;
688							}
689
690							# with /x modifier
691	0					0	my @char = ();
692	0	0				0	if ($modifier =~ /x/oxms) {
693	0					0	@char = $pattern =~ /\G(
694							\\ (?:$q_char) \|
695							\# (?:$q_char)*? $ \|
696							\[ (?: \\\] \| (?:$q_char))+? \] \|
697							\(\? \|
698							(?:$q_char)
699							)/oxmsg;
700							}
701
702							# without /x modifier
703							else {
704	0					0	@char = $pattern =~ /\G(
705							\\ (?:$q_char) \|
706							\[ (?: \\\] \| (?:$q_char))+? \] \|
707							\(\? \|
708							(?:$q_char)
709							)/oxmsg;
710							}
711
712	0					0	return scalar grep { $_ eq '(' } @char;
	0					0
713							}
714
715							#
716							# Latin-4 transliteration (tr///)
717							#
718							sub Char::Elatin4::tr($$$$;$) {
719
720	0			0	0	0	my $bind_operator = $_[1];
721	0					0	my $searchlist = $_[2];
722	0					0	my $replacementlist = $_[3];
723	0		0			0	my $modifier = $_[4] \|\| '';
724
725	0	0				0	if ($modifier =~ /r/oxms) {
726	0	0				0	if ($bind_operator =~ / !~ /oxms) {
727	0					0	croak "Using !~ with tr///r doesn't make sense";
728							}
729							}
730
731	0					0	my @char = $_[0] =~ /\G ($q_char) /oxmsg;
732	0					0	my @searchlist = _charlist_tr($searchlist);
733	0					0	my @replacementlist = _charlist_tr($replacementlist);
734
735	0					0	my %tr = ();
736	0					0	for (my $i=0; $i <= $#searchlist; $i++) {
737	0	0				0	if (not exists $tr{$searchlist[$i]}) {
738	0	0	0			0	if (defined $replacementlist[$i] and ($replacementlist[$i] ne '')) {
		0	0
		0
739	0					0	$tr{$searchlist[$i]} = $replacementlist[$i];
740							}
741							elsif ($modifier =~ /d/oxms) {
742	0					0	$tr{$searchlist[$i]} = '';
743							}
744							elsif (defined $replacementlist[-1] and ($replacementlist[-1] ne '')) {
745	0					0	$tr{$searchlist[$i]} = $replacementlist[-1];
746							}
747							else {
748	0					0	$tr{$searchlist[$i]} = $searchlist[$i];
749							}
750							}
751							}
752
753	0					0	my $tr = 0;
754	0					0	my $replaced = '';
755	0	0				0	if ($modifier =~ /c/oxms) {
756	0					0	while (defined(my $char = shift @char)) {
757	0	0				0	if (not exists $tr{$char}) {
758	0	0				0	if (defined $replacementlist[0]) {
759	0					0	$replaced .= $replacementlist[0];
760							}
761	0					0	$tr++;
762	0	0				0	if ($modifier =~ /s/oxms) {
763	0		0			0	while (@char and (not exists $tr{$char[0]})) {
764	0					0	shift @char;
765	0					0	$tr++;
766							}
767							}
768							}
769							else {
770	0					0	$replaced .= $char;
771							}
772							}
773							}
774							else {
775	0					0	while (defined(my $char = shift @char)) {
776	0	0				0	if (exists $tr{$char}) {
777	0					0	$replaced .= $tr{$char};
778	0					0	$tr++;
779	0	0				0	if ($modifier =~ /s/oxms) {
780	0		0			0	while (@char and (exists $tr{$char[0]}) and ($tr{$char[0]} eq $tr{$char})) {
			0
781	0					0	shift @char;
782	0					0	$tr++;
783							}
784							}
785							}
786							else {
787	0					0	$replaced .= $char;
788							}
789							}
790							}
791
792	0	0				0	if ($modifier =~ /r/oxms) {
793	0					0	return $replaced;
794							}
795							else {
796	0					0	$_[0] = $replaced;
797	0	0				0	if ($bind_operator =~ / !~ /oxms) {
798	0					0	return not $tr;
799							}
800							else {
801	0					0	return $tr;
802							}
803							}
804							}
805
806							#
807							# Latin-4 chop
808							#
809							sub Char::Elatin4::chop(@) {
810
811	0			0	0	0	my $chop;
812	0	0				0	if (@_ == 0) {
813	0					0	my @char = /\G ($q_char) /oxmsg;
814	0					0	$chop = pop @char;
815	0					0	$_ = join '', @char;
816							}
817							else {
818	0					0	for (@_) {
819	0					0	my @char = /\G ($q_char) /oxmsg;
820	0					0	$chop = pop @char;
821	0					0	$_ = join '', @char;
822							}
823							}
824	0					0	return $chop;
825							}
826
827							#
828							# Latin-4 index by octet
829							#
830							sub Char::Elatin4::index($$;$) {
831
832	0			0	1	0	my($str,$substr,$position) = @_;
833	0		0			0	$position \|\|= 0;
834	0					0	my $pos = 0;
835
836	0					0	while ($pos < CORE::length($str)) {
837	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
838	0	0				0	if ($pos >= $position) {
839	0					0	return $pos;
840							}
841							}
842	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
843	0					0	$pos += CORE::length($1);
844							}
845							else {
846	0					0	$pos += 1;
847							}
848							}
849	0					0	return -1;
850							}
851
852							#
853							# Latin-4 reverse index
854							#
855							sub Char::Elatin4::rindex($$;$) {
856
857	0			0	0	0	my($str,$substr,$position) = @_;
858	0		0			0	$position \|\|= CORE::length($str) - 1;
859	0					0	my $pos = 0;
860	0					0	my $rindex = -1;
861
862	0		0			0	while (($pos < CORE::length($str)) and ($pos <= $position)) {
863	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
864	0					0	$rindex = $pos;
865							}
866	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
867	0					0	$pos += CORE::length($1);
868							}
869							else {
870	0					0	$pos += 1;
871							}
872							}
873	0					0	return $rindex;
874							}
875
876							#
877							# Latin-4 lower case first with parameter
878							#
879							sub Char::Elatin4::lcfirst(@) {
880	0	0		0	0	0	if (@_) {
881	0					0	my $s = shift @_;
882	0	0	0			0	if (@_ and wantarray) {
883	0					0	return Char::Elatin4::lc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
884							}
885							else {
886	0					0	return Char::Elatin4::lc(CORE::substr($s,0,1)) . CORE::substr($s,1);
887							}
888							}
889							else {
890	0					0	return Char::Elatin4::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
891							}
892							}
893
894							#
895							# Latin-4 lower case first without parameter
896							#
897							sub Char::Elatin4::lcfirst_() {
898	0			0	0	0	return Char::Elatin4::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
899							}
900
901							#
902							# Latin-4 lower case with parameter
903							#
904							sub Char::Elatin4::lc(@) {
905	0	0		0	0	0	if (@_) {
906	0					0	my $s = shift @_;
907	0	0	0			0	if (@_ and wantarray) {
908	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
909							}
910							else {
911	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
912							}
913							}
914							else {
915	0					0	return Char::Elatin4::lc_();
916							}
917							}
918
919							#
920							# Latin-4 lower case without parameter
921							#
922							sub Char::Elatin4::lc_() {
923	0			0	0	0	my $s = $_;
924	0	0				0	return join '', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
925							}
926
927							#
928							# Latin-4 upper case first with parameter
929							#
930							sub Char::Elatin4::ucfirst(@) {
931	0	0		0	0	0	if (@_) {
932	0					0	my $s = shift @_;
933	0	0	0			0	if (@_ and wantarray) {
934	0					0	return Char::Elatin4::uc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
935							}
936							else {
937	0					0	return Char::Elatin4::uc(CORE::substr($s,0,1)) . CORE::substr($s,1);
938							}
939							}
940							else {
941	0					0	return Char::Elatin4::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
942							}
943							}
944
945							#
946							# Latin-4 upper case first without parameter
947							#
948							sub Char::Elatin4::ucfirst_() {
949	0			0	0	0	return Char::Elatin4::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
950							}
951
952							#
953							# Latin-4 upper case with parameter
954							#
955							sub Char::Elatin4::uc(@) {
956	0	0		0	0	0	if (@_) {
957	0					0	my $s = shift @_;
958	0	0	0			0	if (@_ and wantarray) {
959	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
960							}
961							else {
962	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
963							}
964							}
965							else {
966	0					0	return Char::Elatin4::uc_();
967							}
968							}
969
970							#
971							# Latin-4 upper case without parameter
972							#
973							sub Char::Elatin4::uc_() {
974	0			0	0	0	my $s = $_;
975	0	0				0	return join '', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
976							}
977
978							#
979							# Latin-4 fold case with parameter
980							#
981							sub Char::Elatin4::fc(@) {
982	0	0		0	0	0	if (@_) {
983	0					0	my $s = shift @_;
984	0	0	0			0	if (@_ and wantarray) {
985	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
986							}
987							else {
988	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
989							}
990							}
991							else {
992	0					0	return Char::Elatin4::fc_();
993							}
994							}
995
996							#
997							# Latin-4 fold case without parameter
998							#
999							sub Char::Elatin4::fc_() {
1000	0			0	0	0	my $s = $_;
1001	0	0				0	return join '', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
1002							}
1003
1004							#
1005							# Latin-4 regexp capture
1006							#
1007							{
1008							sub Char::Elatin4::capture {
1009	0			0	1	0	return $_[0];
1010							}
1011							}
1012
1013							#
1014							# Latin-4 regexp ignore case modifier
1015							#
1016							sub Char::Elatin4::ignorecase {
1017
1018	0			0	0	0	my @string = @_;
1019	0					0	my $metachar = qr/[\@\\\|[\]{]/oxms;
1020
1021							# ignore case of $scalar or @array
1022	0					0	for my $string (@string) {
1023
1024							# split regexp
1025	0					0	my @char = $string =~ /\G(
1026							\[\^ \|
1027							\\? (?:$q_char)
1028							)/oxmsg;
1029
1030							# unescape character
1031	0					0	for (my $i=0; $i <= $#char; $i++) {
1032	0	0				0	next if not defined $char[$i];
1033
1034							# open character class [...]
1035	0	0				0	if ($char[$i] eq '[') {
		0
		0
		0
1036	0					0	my $left = $i;
1037
1038							# [] make die "unmatched [] in regexp ..."
1039
1040	0	0				0	if ($char[$i+1] eq ']') {
1041	0					0	$i++;
1042							}
1043
1044	0					0	while (1) {
1045	0	0				0	if (++$i > $#char) {
1046	0					0	croak "Unmatched [] in regexp";
1047							}
1048	0	0				0	if ($char[$i] eq ']') {
1049	0					0	my $right = $i;
1050	0					0	my @charlist = charlist_qr(@char[$left+1..$right-1], 'i');
1051
1052							# escape character
1053	0					0	for my $char (@charlist) {
1054	0	0				0	if (0) {
1055							}
1056
1057	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1058	0					0	$char = $1 . '\\' . $char;
1059							}
1060							}
1061
1062							# [...]
1063	0					0	splice @char, $left, $right-$left+1, '(?:' . join('\|', @charlist) . ')';
1064
1065	0					0	$i = $left;
1066	0					0	last;
1067							}
1068							}
1069							}
1070
1071							# open character class [^...]
1072							elsif ($char[$i] eq '[^') {
1073	0					0	my $left = $i;
1074
1075							# [^] make die "unmatched [] in regexp ..."
1076
1077	0	0				0	if ($char[$i+1] eq ']') {
1078	0					0	$i++;
1079							}
1080
1081	0					0	while (1) {
1082	0	0				0	if (++$i > $#char) {
1083	0					0	croak "Unmatched [] in regexp";
1084							}
1085	0	0				0	if ($char[$i] eq ']') {
1086	0					0	my $right = $i;
1087	0					0	my @charlist = charlist_not_qr(@char[$left+1..$right-1], 'i');
1088
1089							# escape character
1090	0					0	for my $char (@charlist) {
1091	0	0				0	if (0) {
1092							}
1093
1094	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1095	0					0	$char = '\\' . $char;
1096							}
1097							}
1098
1099							# [^...]
1100	0					0	splice @char, $left, $right-$left+1, '(?!' . join('\|', @charlist) . ")(?:$your_char)";
1101
1102	0					0	$i = $left;
1103	0					0	last;
1104							}
1105							}
1106							}
1107
1108							# rewrite classic character class or escape character
1109							elsif (my $char = classic_character_class($char[$i])) {
1110	0					0	$char[$i] = $char;
1111							}
1112
1113							# with /i modifier
1114							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1115	0					0	my $uc = Char::Elatin4::uc($char[$i]);
1116	0					0	my $fc = Char::Elatin4::fc($char[$i]);
1117	0	0				0	if ($uc ne $fc) {
1118	0	0				0	if (CORE::length($fc) == 1) {
1119	0					0	$char[$i] = '[' . $uc . $fc . ']';
1120							}
1121							else {
1122	0					0	$char[$i] = '(?:' . $uc . '\|' . $fc . ')';
1123							}
1124							}
1125							}
1126							}
1127
1128							# characterize
1129	0					0	for (my $i=0; $i <= $#char; $i++) {
1130	0	0				0	next if not defined $char[$i];
1131
1132	0	0				0	if (0) {
1133							}
1134
1135							# quote character before ? + * {
1136	0	0				0	elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
1137	0	0				0	if ($char[$i-1] !~ /\A [\x00-\xFF] \z/oxms) {
1138	0					0	$char[$i-1] = '(?:' . $char[$i-1] . ')';
1139							}
1140							}
1141							}
1142
1143	0					0	$string = join '', @char;
1144							}
1145
1146							# make regexp string
1147	0					0	return @string;
1148							}
1149
1150							#
1151							# classic character class ( \D \S \W \d \s \w \C \X \H \V \h \v \R \N \b \B )
1152							#
1153							sub Char::Elatin4::classic_character_class {
1154	0			0	0	0	my($char) = @_;
1155
1156							return {
1157	0		0			0	'\D' => '${Char::Elatin4::eD}',
1158							'\S' => '${Char::Elatin4::eS}',
1159							'\W' => '${Char::Elatin4::eW}',
1160							'\d' => '[0-9]',
1161
1162							# Before Perl 5.6, \s only matched the five whitespace characters
1163							# tab, newline, form-feed, carriage return, and the space character
1164							# itself, which, taken together, is the character class [\t\n\f\r ].
1165
1166							# Vertical tabs are now whitespace
1167							# \s in a regex now matches a vertical tab in all circumstances.
1168							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1169							# \t \n \v \f \r space
1170							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1171							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1172							'\s' => '\s',
1173
1174							'\w' => '[0-9A-Z_a-z]',
1175							'\C' => '[\x00-\xFF]',
1176							'\X' => 'X',
1177
1178							# \h \v \H \V
1179
1180							# P.114 Character Class Shortcuts
1181							# in Chapter 7: In the World of Regular Expressions
1182							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
1183
1184							# P.357 13.2.3 Whitespace
1185							# in Chapter 13: perlrecharclass: Perl Regular Expression Character Classes
1186							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1187							#
1188							# 0x00009 CHARACTER TABULATION h s
1189							# 0x0000a LINE FEED (LF) vs
1190							# 0x0000b LINE TABULATION v
1191							# 0x0000c FORM FEED (FF) vs
1192							# 0x0000d CARRIAGE RETURN (CR) vs
1193							# 0x00020 SPACE h s
1194
1195							# P.196 Table 5-9. Alphanumeric regex metasymbols
1196							# in Chapter 5. Pattern Matching
1197							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1198
1199							# (and so on)
1200
1201							'\H' => '${Char::Elatin4::eH}',
1202							'\V' => '${Char::Elatin4::eV}',
1203							'\h' => '[\x09\x20]',
1204							'\v' => '[\x0A\x0B\x0C\x0D]',
1205							'\R' => '${Char::Elatin4::eR}',
1206
1207							# \N
1208							#
1209							# http://perldoc.perl.org/perlre.html
1210							# Character Classes and other Special Escapes
1211							# Any character but \n (experimental). Not affected by /s modifier
1212
1213							'\N' => '${Char::Elatin4::eN}',
1214
1215							# \b \B
1216
1217							# P.180 Boundaries: The \b and \B Assertions
1218							# in Chapter 5: Pattern Matching
1219							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1220
1221							# P.219 Boundaries: The \b and \B Assertions
1222							# in Chapter 5: Pattern Matching
1223							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1224
1225							# '\b' => '(?:(?<=\A\|\W)(?=\w)\|(?<=\w)(?=\W\|\z))',
1226							'\b' => '${Char::Elatin4::eb}',
1227
1228							# '\B' => '(?:(?<=\w)(?=\w)\|(?<=\W)(?=\W))',
1229							'\B' => '${Char::Elatin4::eB}',
1230
1231							}->{$char} \|\| '';
1232							}
1233
1234							#
1235							# prepare Latin-4 characters per length
1236							#
1237
1238							# 1 octet characters
1239							my @chars1 = ();
1240							sub chars1 {
1241	0	0		0	0	0	if (@chars1) {
1242	0					0	return @chars1;
1243							}
1244	0	0				0	if (exists $range_tr{1}) {
1245	0					0	my @ranges = @{ $range_tr{1} };
	0					0
1246	0					0	while (my @range = splice(@ranges,0,1)) {
1247	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1248	0					0	push @chars1, pack 'C', $oct0;
1249							}
1250							}
1251							}
1252	0					0	return @chars1;
1253							}
1254
1255							# 2 octets characters
1256							my @chars2 = ();
1257							sub chars2 {
1258	0	0		0	0	0	if (@chars2) {
1259	0					0	return @chars2;
1260							}
1261	0	0				0	if (exists $range_tr{2}) {
1262	0					0	my @ranges = @{ $range_tr{2} };
	0					0
1263	0					0	while (my @range = splice(@ranges,0,2)) {
1264	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1265	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1266	0					0	push @chars2, pack 'CC', $oct0,$oct1;
1267							}
1268							}
1269							}
1270							}
1271	0					0	return @chars2;
1272							}
1273
1274							# 3 octets characters
1275							my @chars3 = ();
1276							sub chars3 {
1277	0	0		0	0	0	if (@chars3) {
1278	0					0	return @chars3;
1279							}
1280	0	0				0	if (exists $range_tr{3}) {
1281	0					0	my @ranges = @{ $range_tr{3} };
	0					0
1282	0					0	while (my @range = splice(@ranges,0,3)) {
1283	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1284	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1285	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1286	0					0	push @chars3, pack 'CCC', $oct0,$oct1,$oct2;
1287							}
1288							}
1289							}
1290							}
1291							}
1292	0					0	return @chars3;
1293							}
1294
1295							# 4 octets characters
1296							my @chars4 = ();
1297							sub chars4 {
1298	0	0		0	0	0	if (@chars4) {
1299	0					0	return @chars4;
1300							}
1301	0	0				0	if (exists $range_tr{4}) {
1302	0					0	my @ranges = @{ $range_tr{4} };
	0					0
1303	0					0	while (my @range = splice(@ranges,0,4)) {
1304	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1305	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1306	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1307	0					0	for my $oct3 (@{$range[3]}) {
	0					0
1308	0					0	push @chars4, pack 'CCCC', $oct0,$oct1,$oct2,$oct3;
1309							}
1310							}
1311							}
1312							}
1313							}
1314							}
1315	0					0	return @chars4;
1316							}
1317
1318							#
1319							# Latin-4 open character list for tr
1320							#
1321							sub _charlist_tr {
1322
1323	0			0		0	local $_ = shift @_;
1324
1325							# unescape character
1326	0					0	my @char = ();
1327	0					0	while (not /\G \z/oxmsgc) {
1328	0	0				0	if (/\G (\\0?55\|\\x2[Dd]\|\\-) /oxmsgc) {
		0
		0
		0
		0
		0
		0
1329	0					0	push @char, '\-';
1330							}
1331							elsif (/\G \\ ([0-7]{2,3}) /oxmsgc) {
1332	0					0	push @char, CORE::chr(oct $1);
1333							}
1334							elsif (/\G \\x ([0-9A-Fa-f]{1,2}) /oxmsgc) {
1335	0					0	push @char, CORE::chr(hex $1);
1336							}
1337							elsif (/\G \\c ([\x40-\x5F]) /oxmsgc) {
1338	0					0	push @char, CORE::chr(CORE::ord($1) & 0x1F);
1339							}
1340							elsif (/\G (\\ [0nrtfbae]) /oxmsgc) {
1341	0					0	push @char, {
1342							'\0' => "\0",
1343							'\n' => "\n",
1344							'\r' => "\r",
1345							'\t' => "\t",
1346							'\f' => "\f",
1347							'\b' => "\x08", # \b means backspace in character class
1348							'\a' => "\a",
1349							'\e' => "\e",
1350							}->{$1};
1351							}
1352							elsif (/\G \\ ($q_char) /oxmsgc) {
1353	0					0	push @char, $1;
1354							}
1355							elsif (/\G ($q_char) /oxmsgc) {
1356	0					0	push @char, $1;
1357							}
1358							}
1359
1360							# join separated multiple-octet
1361	0					0	@char = join('',@char) =~ /\G (\\-\|$q_char) /oxmsg;
1362
1363							# unescape '-'
1364	0					0	my @i = ();
1365	0					0	for my $i (0 .. $#char) {
1366	0	0				0	if ($char[$i] eq '\-') {
		0
1367	0					0	$char[$i] = '-';
1368							}
1369							elsif ($char[$i] eq '-') {
1370	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1371	0					0	push @i, $i;
1372							}
1373							}
1374							}
1375
1376							# open character list (reverse for splice)
1377	0					0	for my $i (CORE::reverse @i) {
1378	0					0	my @range = ();
1379
1380							# range error
1381	0	0	0			0	if ((CORE::length($char[$i-1]) > CORE::length($char[$i+1])) or ($char[$i-1] gt $char[$i+1])) {
1382	0					0	croak "Invalid tr/// range \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1383							}
1384
1385							# range of multiple-octet code
1386	0	0				0	if (CORE::length($char[$i-1]) == 1) {
		0
		0
		0
1387	0	0				0	if (CORE::length($char[$i+1]) == 1) {
		0
		0
		0
1388	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars1();
	0					0
1389							}
1390							elsif (CORE::length($char[$i+1]) == 2) {
1391	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1392	0					0	push @range, grep {$_ le $char[$i+1]} chars2();
	0					0
1393							}
1394							elsif (CORE::length($char[$i+1]) == 3) {
1395	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1396	0					0	push @range, chars2();
1397	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1398							}
1399							elsif (CORE::length($char[$i+1]) == 4) {
1400	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1401	0					0	push @range, chars2();
1402	0					0	push @range, chars3();
1403	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1404							}
1405							else {
1406	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1407							}
1408							}
1409							elsif (CORE::length($char[$i-1]) == 2) {
1410	0	0				0	if (CORE::length($char[$i+1]) == 2) {
		0
		0
1411	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars2();
	0					0
1412							}
1413							elsif (CORE::length($char[$i+1]) == 3) {
1414	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1415	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1416							}
1417							elsif (CORE::length($char[$i+1]) == 4) {
1418	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1419	0					0	push @range, chars3();
1420	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1421							}
1422							else {
1423	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1424							}
1425							}
1426							elsif (CORE::length($char[$i-1]) == 3) {
1427	0	0				0	if (CORE::length($char[$i+1]) == 3) {
		0
1428	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars3();
	0					0
1429							}
1430							elsif (CORE::length($char[$i+1]) == 4) {
1431	0					0	push @range, grep {$char[$i-1] le $_} chars3();
	0					0
1432	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1433							}
1434							else {
1435	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1436							}
1437							}
1438							elsif (CORE::length($char[$i-1]) == 4) {
1439	0	0				0	if (CORE::length($char[$i+1]) == 4) {
1440	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars4();
	0					0
1441							}
1442							else {
1443	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1444							}
1445							}
1446							else {
1447	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1448							}
1449
1450	0					0	splice @char, $i-1, 3, @range;
1451							}
1452
1453	0					0	return @char;
1454							}
1455
1456							#
1457							# Latin-4 open character class
1458							#
1459							sub _cc {
1460	0	0		0		0	if (scalar(@_) == 0) {
		0
		0
1461	0					0	die __FILE__, ": subroutine cc got no parameter.";
1462							}
1463							elsif (scalar(@_) == 1) {
1464	0					0	return sprintf('\x%02X',$_[0]);
1465							}
1466							elsif (scalar(@_) == 2) {
1467	0	0				0	if ($_[0] > $_[1]) {
		0
		0
1468	0					0	die __FILE__, ": subroutine cc got \$_[0] > \$_[1] parameters).";
1469							}
1470							elsif ($_[0] == $_[1]) {
1471	0					0	return sprintf('\x%02X',$_[0]);
1472							}
1473							elsif (($_[0]+1) == $_[1]) {
1474	0					0	return sprintf('[\\x%02X\\x%02X]',$_[0],$_[1]);
1475							}
1476							else {
1477	0					0	return sprintf('[\\x%02X-\\x%02X]',$_[0],$_[1]);
1478							}
1479							}
1480							else {
1481	0					0	die __FILE__, ": subroutine cc got 3 or more parameters (@{[scalar(@_)]} parameters).";
	0					0
1482							}
1483							}
1484
1485							#
1486							# Latin-4 octet range
1487							#
1488							sub _octets {
1489	0			0		0	my $length = shift @_;
1490
1491	0	0				0	if ($length == 1) {
1492	0					0	my($a1) = unpack 'C', $_[0];
1493	0					0	my($z1) = unpack 'C', $_[1];
1494
1495	0	0				0	if ($a1 > $z1) {
1496	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$a1) . '-\x' . unpack('H',$z1);
1497							}
1498
1499	0	0				0	if ($a1 == $z1) {
		0
1500	0					0	return sprintf('\x%02X',$a1);
1501							}
1502							elsif (($a1+1) == $z1) {
1503	0					0	return sprintf('\x%02X\x%02X',$a1,$z1);
1504							}
1505							else {
1506	0					0	return sprintf('\x%02X-\x%02X',$a1,$z1);
1507							}
1508							}
1509							else {
1510	0					0	die __FILE__, ": subroutine _octets got invalid length ($length).";
1511							}
1512							}
1513
1514							#
1515							# Latin-4 range regexp
1516							#
1517							sub _range_regexp {
1518	0			0		0	my($length,$first,$last) = @_;
1519
1520	0					0	my @range_regexp = ();
1521	0	0				0	if (not exists $range_tr{$length}) {
1522	0					0	return @range_regexp;
1523							}
1524
1525	0					0	my @ranges = @{ $range_tr{$length} };
	0					0
1526	0					0	while (my @range = splice(@ranges,0,$length)) {
1527	0					0	my $min = '';
1528	0					0	my $max = '';
1529	0					0	for (my $i=0; $i < $length; $i++) {
1530	0					0	$min .= pack 'C', $range[$i][0];
1531	0					0	$max .= pack 'C', $range[$i][-1];
1532							}
1533
1534							# min___max
1535							# FIRST_____________LAST
1536							# (nothing)
1537
1538	0	0	0			0	if ($max lt $first) {
		0	0
		0	0
		0	0
		0	0
		0	0
		0	0
1539							}
1540
1541							# **********
1542							# min_________max
1543							# FIRST_____________LAST
1544							# **********
1545
1546							elsif (($min le $first) and ($first le $max) and ($max le $last)) {
1547	0					0	push @range_regexp, _octets($length,$first,$max,$min,$max);
1548							}
1549
1550							# **********************
1551							# min________________max
1552							# FIRST_____________LAST
1553							# **********************
1554
1555							elsif (($min eq $first) and ($max eq $last)) {
1556	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1557							}
1558
1559							# *********
1560							# min___max
1561							# FIRST_____________LAST
1562							# *********
1563
1564							elsif (($first le $min) and ($max le $last)) {
1565	0					0	push @range_regexp, _octets($length,$min,$max,$min,$max);
1566							}
1567
1568							# **********************
1569							# min__________________________max
1570							# FIRST_____________LAST
1571							# **********************
1572
1573							elsif (($min le $first) and ($last le $max)) {
1574	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1575							}
1576
1577							# *********
1578							# min________max
1579							# FIRST_____________LAST
1580							# *********
1581
1582							elsif (($first le $min) and ($min le $last) and ($last le $max)) {
1583	0					0	push @range_regexp, _octets($length,$min,$last,$min,$max);
1584							}
1585
1586							# min___max
1587							# FIRST_____________LAST
1588							# (nothing)
1589
1590							elsif ($last lt $min) {
1591							}
1592
1593							else {
1594	0					0	die __FILE__, ": subroutine _range_regexp panic.";
1595							}
1596							}
1597
1598	0					0	return @range_regexp;
1599							}
1600
1601							#
1602							# Latin-4 open character list for qr and not qr
1603							#
1604							sub _charlist {
1605
1606	0			0		0	my $modifier = pop @_;
1607	0					0	my @char = @_;
1608
1609	0	0				0	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
1610
1611							# unescape character
1612	0					0	for (my $i=0; $i <= $#char; $i++) {
1613
1614							# escape - to ...
1615	0	0	0			0	if ($char[$i] eq '-') {
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
1616	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1617	0					0	$char[$i] = '...';
1618							}
1619							}
1620
1621							# octal escape sequence
1622							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
1623	0					0	$char[$i] = octchr($1);
1624							}
1625
1626							# hexadecimal escape sequence
1627							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
1628	0					0	$char[$i] = hexchr($1);
1629							}
1630
1631							# \N{CHARNAME} --> N\{CHARNAME}
1632							elsif ($char[$i] =~ /\A \\ (N) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1633	0					0	$char[$i] = $1 . '\\' . $2;
1634							}
1635
1636							# \p{PROPERTY} --> p\{PROPERTY}
1637							elsif ($char[$i] =~ /\A \\ (p) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1638	0					0	$char[$i] = $1 . '\\' . $2;
1639							}
1640
1641							# \P{PROPERTY} --> P\{PROPERTY}
1642							elsif ($char[$i] =~ /\A \\ (P) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1643	0					0	$char[$i] = $1 . '\\' . $2;
1644							}
1645
1646							# \p, \P, \X --> p, P, X
1647							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
1648	0					0	$char[$i] = $1;
1649							}
1650
1651							elsif ($char[$i] =~ /\A \\ ([0-7]{2,3}) \z/oxms) {
1652	0					0	$char[$i] = CORE::chr oct $1;
1653							}
1654							elsif ($char[$i] =~ /\A \\x ([0-9A-Fa-f]{1,2}) \z/oxms) {
1655	0					0	$char[$i] = CORE::chr hex $1;
1656							}
1657							elsif ($char[$i] =~ /\A \\c ([\x40-\x5F]) \z/oxms) {
1658	0					0	$char[$i] = CORE::chr(CORE::ord($1) & 0x1F);
1659							}
1660							elsif ($char[$i] =~ /\A (\\ [0nrtfbaedswDSWHVhvR]) \z/oxms) {
1661	0					0	$char[$i] = {
1662							'\0' => "\0",
1663							'\n' => "\n",
1664							'\r' => "\r",
1665							'\t' => "\t",
1666							'\f' => "\f",
1667							'\b' => "\x08", # \b means backspace in character class
1668							'\a' => "\a",
1669							'\e' => "\e",
1670							'\d' => '[0-9]',
1671
1672							# Vertical tabs are now whitespace
1673							# \s in a regex now matches a vertical tab in all circumstances.
1674							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1675							# \t \n \v \f \r space
1676							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1677							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1678							'\s' => '\s',
1679
1680							'\w' => '[0-9A-Z_a-z]',
1681							'\D' => '${Char::Elatin4::eD}',
1682							'\S' => '${Char::Elatin4::eS}',
1683							'\W' => '${Char::Elatin4::eW}',
1684
1685							'\H' => '${Char::Elatin4::eH}',
1686							'\V' => '${Char::Elatin4::eV}',
1687							'\h' => '[\x09\x20]',
1688							'\v' => '[\x0A\x0B\x0C\x0D]',
1689							'\R' => '${Char::Elatin4::eR}',
1690
1691							}->{$1};
1692							}
1693
1694							# POSIX-style character classes
1695							elsif ($ignorecase and ($char[$i] =~ /\A ( \[\: \^? (?:lower\|upper) :\] ) \z/oxms)) {
1696	0					0	$char[$i] = {
1697
1698							'[:lower:]' => '[\x41-\x5A\x61-\x7A]',
1699							'[:upper:]' => '[\x41-\x5A\x61-\x7A]',
1700							'[:^lower:]' => '${Char::Elatin4::not_lower_i}',
1701							'[:^upper:]' => '${Char::Elatin4::not_upper_i}',
1702
1703							}->{$1};
1704							}
1705							elsif ($char[$i] =~ /\A ( \[\: \^? (?:alnum\|alpha\|ascii\|blank\|cntrl\|digit\|graph\|lower\|print\|punct\|space\|upper\|word\|xdigit) :\] ) \z/oxms) {
1706	0					0	$char[$i] = {
1707
1708							'[:alnum:]' => '[\x30-\x39\x41-\x5A\x61-\x7A]',
1709							'[:alpha:]' => '[\x41-\x5A\x61-\x7A]',
1710							'[:ascii:]' => '[\x00-\x7F]',
1711							'[:blank:]' => '[\x09\x20]',
1712							'[:cntrl:]' => '[\x00-\x1F\x7F]',
1713							'[:digit:]' => '[\x30-\x39]',
1714							'[:graph:]' => '[\x21-\x7F]',
1715							'[:lower:]' => '[\x61-\x7A]',
1716							'[:print:]' => '[\x20-\x7F]',
1717							'[:punct:]' => '[\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]',
1718
1719							# P.174 POSIX-Style Character Classes
1720							# in Chapter 5: Pattern Matching
1721							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1722
1723							# P.311 11.2.4 Character Classes and other Special Escapes
1724							# in Chapter 11: perlre: Perl regular expressions
1725							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1726
1727							# P.210 POSIX-Style Character Classes
1728							# in Chapter 5: Pattern Matching
1729							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1730
1731							'[:space:]' => '[\s\x0B]', # "\s" plus vertical tab ("\cK")
1732
1733							'[:upper:]' => '[\x41-\x5A]',
1734							'[:word:]' => '[\x30-\x39\x41-\x5A\x5F\x61-\x7A]',
1735							'[:xdigit:]' => '[\x30-\x39\x41-\x46\x61-\x66]',
1736							'[:^alnum:]' => '${Char::Elatin4::not_alnum}',
1737							'[:^alpha:]' => '${Char::Elatin4::not_alpha}',
1738							'[:^ascii:]' => '${Char::Elatin4::not_ascii}',
1739							'[:^blank:]' => '${Char::Elatin4::not_blank}',
1740							'[:^cntrl:]' => '${Char::Elatin4::not_cntrl}',
1741							'[:^digit:]' => '${Char::Elatin4::not_digit}',
1742							'[:^graph:]' => '${Char::Elatin4::not_graph}',
1743							'[:^lower:]' => '${Char::Elatin4::not_lower}',
1744							'[:^print:]' => '${Char::Elatin4::not_print}',
1745							'[:^punct:]' => '${Char::Elatin4::not_punct}',
1746							'[:^space:]' => '${Char::Elatin4::not_space}',
1747							'[:^upper:]' => '${Char::Elatin4::not_upper}',
1748							'[:^word:]' => '${Char::Elatin4::not_word}',
1749							'[:^xdigit:]' => '${Char::Elatin4::not_xdigit}',
1750
1751							}->{$1};
1752							}
1753							elsif ($char[$i] =~ /\A \\ ($q_char) \z/oxms) {
1754	0					0	$char[$i] = $1;
1755							}
1756							}
1757
1758							# open character list
1759	0					0	my @singleoctet = ();
1760	0					0	my @multipleoctet = ();
1761	0					0	for (my $i=0; $i <= $#char; ) {
1762
1763							# escaped -
1764	0	0	0			0	if (defined($char[$i+1]) and ($char[$i+1] eq '...')) {
		0
		0
		0
		0
		0
1765	0					0	$i += 1;
1766	0					0	next;
1767							}
1768
1769							# make range regexp
1770							elsif ($char[$i] eq '...') {
1771
1772							# range error
1773	0	0				0	if (CORE::length($char[$i-1]) > CORE::length($char[$i+1])) {
		0
1774	0					0	croak 'Invalid [] range in regexp (length(A) > length(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1775							}
1776							elsif (CORE::length($char[$i-1]) == CORE::length($char[$i+1])) {
1777	0	0				0	if ($char[$i-1] gt $char[$i+1]) {
1778	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1779							}
1780							}
1781
1782							# make range regexp per length
1783	0					0	for my $length (CORE::length($char[$i-1]) .. CORE::length($char[$i+1])) {
1784	0					0	my @regexp = ();
1785
1786							# is first and last
1787	0	0	0			0	if (($length == CORE::length($char[$i-1])) and ($length == CORE::length($char[$i+1]))) {
		0	0
		0
		0
1788	0					0	push @regexp, _range_regexp($length, $char[$i-1], $char[$i+1]);
1789							}
1790
1791							# is first
1792							elsif ($length == CORE::length($char[$i-1])) {
1793	0					0	push @regexp, _range_regexp($length, $char[$i-1], "\xFF" x $length);
1794							}
1795
1796							# is inside in first and last
1797							elsif ((CORE::length($char[$i-1]) < $length) and ($length < CORE::length($char[$i+1]))) {
1798	0					0	push @regexp, _range_regexp($length, "\x00" x $length, "\xFF" x $length);
1799							}
1800
1801							# is last
1802							elsif ($length == CORE::length($char[$i+1])) {
1803	0					0	push @regexp, _range_regexp($length, "\x00" x $length, $char[$i+1]);
1804							}
1805
1806							else {
1807	0					0	die __FILE__, ": subroutine make_regexp panic.";
1808							}
1809
1810	0	0				0	if ($length == 1) {
1811	0					0	push @singleoctet, @regexp;
1812							}
1813							else {
1814	0					0	push @multipleoctet, @regexp;
1815							}
1816							}
1817
1818	0					0	$i += 2;
1819							}
1820
1821							# with /i modifier
1822							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1823	0	0				0	if ($modifier =~ /i/oxms) {
1824	0					0	my $uc = Char::Elatin4::uc($char[$i]);
1825	0					0	my $fc = Char::Elatin4::fc($char[$i]);
1826	0	0				0	if ($uc ne $fc) {
1827	0	0				0	if (CORE::length($fc) == 1) {
1828	0					0	push @singleoctet, $uc, $fc;
1829							}
1830							else {
1831	0					0	push @singleoctet, $uc;
1832	0					0	push @multipleoctet, $fc;
1833							}
1834							}
1835							else {
1836	0					0	push @singleoctet, $char[$i];
1837							}
1838							}
1839							else {
1840	0					0	push @singleoctet, $char[$i];
1841							}
1842	0					0	$i += 1;
1843							}
1844
1845							# single character of single octet code
1846							elsif ($char[$i] =~ /\A (?: \\h ) \z/oxms) {
1847	0					0	push @singleoctet, "\t", "\x20";
1848	0					0	$i += 1;
1849							}
1850							elsif ($char[$i] =~ /\A (?: \\v ) \z/oxms) {
1851	0					0	push @singleoctet, "\x0A", "\x0B", "\x0C", "\x0D";
1852	0					0	$i += 1;
1853							}
1854							elsif ($char[$i] =~ /\A (?: \\d \| \\s \| \\w ) \z/oxms) {
1855	0					0	push @singleoctet, $char[$i];
1856	0					0	$i += 1;
1857							}
1858
1859							# single character of multiple-octet code
1860							else {
1861	0					0	push @multipleoctet, $char[$i];
1862	0					0	$i += 1;
1863							}
1864							}
1865
1866							# quote metachar
1867	0					0	for (@singleoctet) {
1868	0	0				0	if ($_ eq '...') {
		0
		0
		0
		0
1869	0					0	$_ = '-';
1870							}
1871							elsif (/\A \n \z/oxms) {
1872	0					0	$_ = '\n';
1873							}
1874							elsif (/\A \r \z/oxms) {
1875	0					0	$_ = '\r';
1876							}
1877							elsif (/\A ([\x00-\x20\x7F-\xFF]) \z/oxms) {
1878	0					0	$_ = sprintf('\x%02X', CORE::ord $1);
1879							}
1880							elsif (/\A [\x00-\xFF] \z/oxms) {
1881	0					0	$_ = quotemeta $_;
1882							}
1883							}
1884
1885							# return character list
1886	0					0	return \@singleoctet, \@multipleoctet;
1887							}
1888
1889							#
1890							# Latin-4 octal escape sequence
1891							#
1892							sub octchr {
1893	0			0	0	0	my($octdigit) = @_;
1894
1895	0					0	my @binary = ();
1896	0					0	for my $octal (split(//,$octdigit)) {
1897	0					0	push @binary, {
1898							'0' => '000',
1899							'1' => '001',
1900							'2' => '010',
1901							'3' => '011',
1902							'4' => '100',
1903							'5' => '101',
1904							'6' => '110',
1905							'7' => '111',
1906							}->{$octal};
1907							}
1908	0					0	my $binary = join '', @binary;
1909
1910	0					0	my $octchr = {
1911							# 1234567
1912							1 => pack('B*', "0000000$binary"),
1913							2 => pack('B*', "000000$binary"),
1914							3 => pack('B*', "00000$binary"),
1915							4 => pack('B*', "0000$binary"),
1916							5 => pack('B*', "000$binary"),
1917							6 => pack('B*', "00$binary"),
1918							7 => pack('B*', "0$binary"),
1919							0 => pack('B*', "$binary"),
1920
1921							}->{CORE::length($binary) % 8};
1922
1923	0					0	return $octchr;
1924							}
1925
1926							#
1927							# Latin-4 hexadecimal escape sequence
1928							#
1929							sub hexchr {
1930	0			0	0	0	my($hexdigit) = @_;
1931
1932	0					0	my $hexchr = {
1933							1 => pack('H*', "0$hexdigit"),
1934							0 => pack('H*', "$hexdigit"),
1935
1936							}->{CORE::length($_[0]) % 2};
1937
1938	0					0	return $hexchr;
1939							}
1940
1941							#
1942							# Latin-4 open character list for qr
1943							#
1944							sub charlist_qr {
1945
1946	0			0	0	0	my $modifier = pop @_;
1947	0					0	my @char = @_;
1948
1949	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
1950	0					0	my @singleoctet = @$singleoctet;
1951	0					0	my @multipleoctet = @$multipleoctet;
1952
1953							# return character list
1954	0	0				0	if (scalar(@singleoctet) >= 1) {
1955
1956							# with /i modifier
1957	0	0				0	if ($modifier =~ m/i/oxms) {
1958	0					0	my %singleoctet_ignorecase = ();
1959	0					0	for (@singleoctet) {
1960	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
1961	0					0	for my $ord (hex($1) .. hex($2)) {
1962	0					0	my $char = CORE::chr($ord);
1963	0					0	my $uc = Char::Elatin4::uc($char);
1964	0					0	my $fc = Char::Elatin4::fc($char);
1965	0	0				0	if ($uc eq $fc) {
1966	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
1967							}
1968							else {
1969	0	0				0	if (CORE::length($fc) == 1) {
1970	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1971	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
1972							}
1973							else {
1974	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1975	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
1976							}
1977							}
1978							}
1979							}
1980	0	0				0	if ($_ ne '') {
1981	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
1982							}
1983							}
1984	0					0	my $i = 0;
1985	0					0	my @singleoctet_ignorecase = ();
1986	0					0	for my $ord (0 .. 255) {
1987	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
1988	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
1989							}
1990							else {
1991	0					0	$i++;
1992							}
1993							}
1994	0					0	@singleoctet = ();
1995	0					0	for my $range (@singleoctet_ignorecase) {
1996	0	0				0	if (ref $range) {
1997	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
1998	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
1999							}
2000							elsif (scalar(@{$range}) == 2) {
2001	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2002							}
2003							else {
2004	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2005							}
2006							}
2007							}
2008							}
2009
2010	0					0	my $not_anchor = '';
2011
2012	0					0	push @multipleoctet, join('', $not_anchor, '[', @singleoctet, ']' );
2013							}
2014	0	0				0	if (scalar(@multipleoctet) >= 2) {
2015	0					0	return '(?:' . join('\|', @multipleoctet) . ')';
2016							}
2017							else {
2018	0					0	return $multipleoctet[0];
2019							}
2020							}
2021
2022							#
2023							# Latin-4 open character list for not qr
2024							#
2025							sub charlist_not_qr {
2026
2027	0			0	0	0	my $modifier = pop @_;
2028	0					0	my @char = @_;
2029
2030	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
2031	0					0	my @singleoctet = @$singleoctet;
2032	0					0	my @multipleoctet = @$multipleoctet;
2033
2034							# with /i modifier
2035	0	0				0	if ($modifier =~ m/i/oxms) {
2036	0					0	my %singleoctet_ignorecase = ();
2037	0					0	for (@singleoctet) {
2038	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
2039	0					0	for my $ord (hex($1) .. hex($2)) {
2040	0					0	my $char = CORE::chr($ord);
2041	0					0	my $uc = Char::Elatin4::uc($char);
2042	0					0	my $fc = Char::Elatin4::fc($char);
2043	0	0				0	if ($uc eq $fc) {
2044	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
2045							}
2046							else {
2047	0	0				0	if (CORE::length($fc) == 1) {
2048	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2049	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
2050							}
2051							else {
2052	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2053	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
2054							}
2055							}
2056							}
2057							}
2058	0	0				0	if ($_ ne '') {
2059	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
2060							}
2061							}
2062	0					0	my $i = 0;
2063	0					0	my @singleoctet_ignorecase = ();
2064	0					0	for my $ord (0 .. 255) {
2065	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
2066	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
2067							}
2068							else {
2069	0					0	$i++;
2070							}
2071							}
2072	0					0	@singleoctet = ();
2073	0					0	for my $range (@singleoctet_ignorecase) {
2074	0	0				0	if (ref $range) {
2075	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
2076	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
2077							}
2078							elsif (scalar(@{$range}) == 2) {
2079	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2080							}
2081							else {
2082	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2083							}
2084							}
2085							}
2086							}
2087
2088							# return character list
2089	0	0				0	if (scalar(@multipleoctet) >= 1) {
2090	0	0				0	if (scalar(@singleoctet) >= 1) {
2091
2092							# any character other than multiple-octet and single octet character class
2093	0					0	return '(?!' . join('\|', @multipleoctet) . ')(?:[^' . join('', @singleoctet) . '])';
2094							}
2095							else {
2096
2097							# any character other than multiple-octet character class
2098	0					0	return '(?!' . join('\|', @multipleoctet) . ")(?:$your_char)";
2099							}
2100							}
2101							else {
2102	0	0				0	if (scalar(@singleoctet) >= 1) {
2103
2104							# any character other than single octet character class
2105	0					0	return '(?:[^' . join('', @singleoctet) . '])';
2106							}
2107							else {
2108
2109							# any character
2110	0					0	return "(?:$your_char)";
2111							}
2112							}
2113							}
2114
2115							#
2116							# open file in read mode
2117							#
2118							sub _open_r {
2119	177			177		646	my(undef,$file) = @_;
2120	177					933	$file =~ s#\A (\s) #./$1#oxms;
2121	177		33			21247	return eval(q{open($_[0],'<',$_[1])}) \|\|
2122							open($_[0],"< $file\0");
2123							}
2124
2125							#
2126							# open file in write mode
2127							#
2128							sub _open_w {
2129	0			0		0	my(undef,$file) = @_;
2130	0					0	$file =~ s#\A (\s) #./$1#oxms;
2131	0		0			0	return eval(q{open($_[0],'>',$_[1])}) \|\|
2132							open($_[0],"> $file\0");
2133							}
2134
2135							#
2136							# open file in append mode
2137							#
2138							sub _open_a {
2139	0			0		0	my(undef,$file) = @_;
2140	0					0	$file =~ s#\A (\s) #./$1#oxms;
2141	0		0			0	return eval(q{open($_[0],'>>',$_[1])}) \|\|
2142							open($_[0],">> $file\0");
2143							}
2144
2145							#
2146							# safe system
2147							#
2148							sub _systemx {
2149
2150							# P.707 29.2.33. exec
2151							# in Chapter 29: Functions
2152							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2153							#
2154							# Be aware that in older releases of Perl, exec (and system) did not flush
2155							# your output buffer, so you needed to enable command buffering by setting $\|
2156							# on one or more filehandles to avoid lost output in the case of exec, or
2157							# misordererd output in the case of system. This situation was largely remedied
2158							# in the 5.6 release of Perl. (So, 5.005 release not yet.)
2159
2160							# P.855 exec
2161							# in Chapter 27: Functions
2162							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2163							#
2164							# In very old release of Perl (before v5.6), exec (and system) did not flush
2165							# your output buffer, so you needed to enable command buffering by setting $\|
2166							# on one or more filehandles to avoid lost output with exec or misordered
2167							# output with system.
2168
2169	177			177		775	$\| = 1;
2170
2171							# P.565 23.1.2. Cleaning Up Your Environment
2172							# in Chapter 23: Security
2173							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2174
2175							# P.656 Cleaning Up Your Environment
2176							# in Chapter 20: Security
2177							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2178
2179							# local $ENV{'PATH'} = '.';
2180	177					2287	local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer
2181
2182							# P.707 29.2.33. exec
2183							# in Chapter 29: Functions
2184							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2185							#
2186							# As we mentioned earlier, exec treats a discrete list of arguments as an
2187							# indication that it should bypass shell processing. However, there is one
2188							# place where you might still get tripped up. The exec call (and system, too)
2189							# will not distinguish between a single scalar argument and an array containing
2190							# only one element.
2191							#
2192							# @args = ("echo surprise"); # just one element in list
2193							# exec @args # still subject to shell escapes
2194							# or die "exec: $!"; # because @args == 1
2195							#
2196							# To avoid this, you can use the PATHNAME syntax, explicitly duplicating the
2197							# first argument as the pathname, which forces the rest of the arguments to be
2198							# interpreted as a list, even if there is only one of them:
2199							#
2200							# exec { $args[0] } @args # safe even with one-argument list
2201							# or die "can't exec @args: $!";
2202
2203							# P.855 exec
2204							# in Chapter 27: Functions
2205							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2206							#
2207							# As we mentioned earlier, exec treats a discrete list of arguments as a
2208							# directive to bypass shell processing. However, there is one place where
2209							# you might still get tripped up. The exec call (and system, too) cannot
2210							# distinguish between a single scalar argument and an array containing
2211							# only one element.
2212							#
2213							# @args = ("echo surprise"); # just one element in list
2214							# exec @args # still subject to shell escapes
2215							# \|\| die "exec: $!"; # because @args == 1
2216							#
2217							# To avoid this, use the PATHNAME syntax, explicitly duplicating the first
2218							# argument as the pathname, which forces the rest of the arguments to be
2219							# interpreted as a list, even if there is only one of them:
2220							#
2221							# exec { $args[0] } @args # safe even with one-argument list
2222							# \|\| die "can't exec @args: $!";
2223
2224	177					451	return CORE::system { $_[0] } @_; # safe even with one-argument list
	177					14743066
2225							}
2226
2227							#
2228							# Latin-4 order to character (with parameter)
2229							#
2230							sub Char::Elatin4::chr(;$) {
2231
2232	0	0		0	0		my $c = @_ ? $_[0] : $_;
2233
2234	0	0					if ($c == 0x00) {
2235	0						return "\x00";
2236							}
2237							else {
2238	0						my @chr = ();
2239	0						while ($c > 0) {
2240	0						unshift @chr, ($c % 0x100);
2241	0						$c = int($c / 0x100);
2242							}
2243	0						return pack 'C*', @chr;
2244							}
2245							}
2246
2247							#
2248							# Latin-4 order to character (without parameter)
2249							#
2250							sub Char::Elatin4::chr_() {
2251
2252	0			0	0		my $c = $_;
2253
2254	0	0					if ($c == 0x00) {
2255	0						return "\x00";
2256							}
2257							else {
2258	0						my @chr = ();
2259	0						while ($c > 0) {
2260	0						unshift @chr, ($c % 0x100);
2261	0						$c = int($c / 0x100);
2262							}
2263	0						return pack 'C*', @chr;
2264							}
2265							}
2266
2267							#
2268							# Latin-4 path globbing (with parameter)
2269							#
2270							sub Char::Elatin4::glob($) {
2271
2272	0	0		0	0		if (wantarray) {
2273	0						my @glob = _DOS_like_glob(@_);
2274	0						for my $glob (@glob) {
2275	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2276							}
2277	0						return @glob;
2278							}
2279							else {
2280	0						my $glob = _DOS_like_glob(@_);
2281	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2282	0						return $glob;
2283							}
2284							}
2285
2286							#
2287							# Latin-4 path globbing (without parameter)
2288							#
2289							sub Char::Elatin4::glob_() {
2290
2291	0	0		0	0		if (wantarray) {
2292	0						my @glob = _DOS_like_glob();
2293	0						for my $glob (@glob) {
2294	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2295							}
2296	0						return @glob;
2297							}
2298							else {
2299	0						my $glob = _DOS_like_glob();
2300	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2301	0						return $glob;
2302							}
2303							}
2304
2305							#
2306							# Latin-4 path globbing via File::DosGlob 1.10
2307							#
2308							# Often I confuse "_dosglob" and "_doglob".
2309							# So, I renamed "_dosglob" to "_DOS_like_glob".
2310							#
2311							my %iter;
2312							my %entries;
2313							sub _DOS_like_glob {
2314
2315							# context (keyed by second cxix argument provided by core)
2316	0			0			my($expr,$cxix) = @_;
2317
2318							# glob without args defaults to $_
2319	0	0					$expr = $_ if not defined $expr;
2320
2321							# represents the current user's home directory
2322							#
2323							# 7.3. Expanding Tildes in Filenames
2324							# in Chapter 7. File Access
2325							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2326							#
2327							# and File::HomeDir, File::HomeDir::Windows module
2328
2329							# DOS-like system
2330	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2331	0						$expr =~ s{ \A ~ (?= [^/\\] ) }
2332	0						{ my_home_MSWin32() }oxmse;
2333							}
2334
2335							# UNIX-like system
2336							else {
2337	0						$expr =~ s{ \A ~ ( (?:[^/])* ) }
2338	0	0	0				{ $1 ? (eval(q{(getpwnam($1))[7]})\|\|my_home()) : my_home() }oxmse;
2339							}
2340
2341							# assume global context if not provided one
2342	0	0					$cxix = '_G_' if not defined $cxix;
2343	0	0					$iter{$cxix} = 0 if not exists $iter{$cxix};
2344
2345							# if we're just beginning, do it all first
2346	0	0					if ($iter{$cxix} == 0) {
2347	0						$entries{$cxix} = [ _do_glob(1, _parse_line($expr)) ];
2348							}
2349
2350							# chuck it all out, quick or slow
2351	0	0					if (wantarray) {
2352	0						delete $iter{$cxix};
2353	0						return @{delete $entries{$cxix}};
	0
2354							}
2355							else {
2356	0	0					if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
	0
2357	0						return shift @{$entries{$cxix}};
	0
2358							}
2359							else {
2360							# return undef for EOL
2361	0						delete $iter{$cxix};
2362	0						delete $entries{$cxix};
2363	0						return undef;
2364							}
2365							}
2366							}
2367
2368							#
2369							# Latin-4 path globbing subroutine
2370							#
2371							sub _do_glob {
2372
2373	0			0			my($cond,@expr) = @_;
2374	0						my @glob = ();
2375	0						my $fix_drive_relative_paths = 0;
2376
2377							OUTER:
2378	0						for my $expr (@expr) {
2379	0	0					next OUTER if not defined $expr;
2380	0	0					next OUTER if $expr eq '';
2381
2382	0						my @matched = ();
2383	0						my @globdir = ();
2384	0						my $head = '.';
2385	0						my $pathsep = '/';
2386	0						my $tail;
2387
2388							# if argument is within quotes strip em and do no globbing
2389	0	0					if ($expr =~ /\A " ((?:$q_char)*) " \z/oxms) {
2390	0						$expr = $1;
2391	0	0					if ($cond eq 'd') {
2392	0	0					if (-d $expr) {
2393	0						push @glob, $expr;
2394							}
2395							}
2396							else {
2397	0	0					if (-e $expr) {
2398	0						push @glob, $expr;
2399							}
2400							}
2401	0						next OUTER;
2402							}
2403
2404							# wildcards with a drive prefix such as h:*.pm must be changed
2405							# to h:./*.pm to expand correctly
2406	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2407	0	0					if ($expr =~ s# \A ((?:[A-Za-z]:)?) ([^/\\]) #$1./$2#oxms) {
2408	0						$fix_drive_relative_paths = 1;
2409							}
2410							}
2411
2412	0	0					if (($head, $tail) = _parse_path($expr,$pathsep)) {
2413	0	0					if ($tail eq '') {
2414	0						push @glob, $expr;
2415	0						next OUTER;
2416							}
2417	0	0					if ($head =~ / \A (?:$q_char)? [?] /oxms) {
2418	0	0					if (@globdir = _do_glob('d', $head)) {
2419	0						push @glob, _do_glob($cond, map {"$_$pathsep$tail"} @globdir);
	0
2420	0						next OUTER;
2421							}
2422							}
2423	0	0	0				if ($head eq '' or $head =~ /\A [A-Za-z]: \z/oxms) {
2424	0						$head .= $pathsep;
2425							}
2426	0						$expr = $tail;
2427							}
2428
2429							# If file component has no wildcards, we can avoid opendir
2430	0	0					if ($expr !~ / \A (?:$q_char)? [?] /oxms) {
2431	0	0					if ($head eq '.') {
2432	0						$head = '';
2433							}
2434	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2435	0						$head .= $pathsep;
2436							}
2437	0						$head .= $expr;
2438	0	0					if ($cond eq 'd') {
2439	0	0					if (-d $head) {
2440	0						push @glob, $head;
2441							}
2442							}
2443							else {
2444	0	0					if (-e $head) {
2445	0						push @glob, $head;
2446							}
2447							}
2448	0						next OUTER;
2449							}
2450	0	0					opendir(*DIR, $head) or next OUTER;
2451	0						my @leaf = readdir DIR;
2452	0						closedir DIR;
2453
2454	0	0					if ($head eq '.') {
2455	0						$head = '';
2456							}
2457	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2458	0						$head .= $pathsep;
2459							}
2460
2461	0						my $pattern = '';
2462	0						while ($expr =~ / \G ($q_char) /oxgc) {
2463	0						my $char = $1;
2464
2465							# 6.9. Matching Shell Globs as Regular Expressions
2466							# in Chapter 6. Pattern Matching
2467							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2468							# (and so on)
2469
2470	0	0					if ($char eq '*') {
		0
		0
2471	0						$pattern .= "(?:$your_char)*",
2472							}
2473							elsif ($char eq '?') {
2474	0						$pattern .= "(?:$your_char)?", # DOS style
2475							# $pattern .= "(?:$your_char)", # UNIX style
2476							}
2477							elsif ((my $fc = Char::Elatin4::fc($char)) ne $char) {
2478	0						$pattern .= $fc;
2479							}
2480							else {
2481	0						$pattern .= quotemeta $char;
2482							}
2483							}
2484	0			0			my $matchsub = sub { Char::Elatin4::fc($_[0]) =~ /\A $pattern \z/xms };
	0
2485
2486							# if ($@) {
2487							# print STDERR "$0: $@\n";
2488							# next OUTER;
2489							# }
2490
2491							INNER:
2492	0						for my $leaf (@leaf) {
2493	0	0	0				if ($leaf eq '.' or $leaf eq '..') {
2494	0						next INNER;
2495							}
2496	0	0	0				if ($cond eq 'd' and not -d "$head$leaf") {
2497	0						next INNER;
2498							}
2499
2500	0	0					if (&$matchsub($leaf)) {
2501	0						push @matched, "$head$leaf";
2502	0						next INNER;
2503							}
2504
2505							# [DOS compatibility special case]
2506							# Failed, add a trailing dot and try again, but only...
2507
2508	0	0	0				if (Char::Elatin4::index($leaf,'.') == -1 and # if name does not have a dot in it and
			0
2509							CORE::length($leaf) <= 8 and # name is shorter than or equal to 8 chars and
2510							Char::Elatin4::index($pattern,'\\.') != -1 # pattern has a dot.
2511							) {
2512	0	0					if (&$matchsub("$leaf.")) {
2513	0						push @matched, "$head$leaf";
2514	0						next INNER;
2515							}
2516							}
2517							}
2518	0	0					if (@matched) {
2519	0						push @glob, @matched;
2520							}
2521							}
2522	0	0					if ($fix_drive_relative_paths) {
2523	0						for my $glob (@glob) {
2524	0						$glob =~ s# \A ([A-Za-z]:) \./ #$1#oxms;
2525							}
2526							}
2527	0						return @glob;
2528							}
2529
2530							#
2531							# Latin-4 parse line
2532							#
2533							sub _parse_line {
2534
2535	0			0			my($line) = @_;
2536
2537	0						$line .= ' ';
2538	0						my @piece = ();
2539	0						while ($line =~ /
2540							" ( (?: [^"] )* ) " \s+ \|
2541							( (?: [^"\s] )* ) \s+
2542							/oxmsg
2543							) {
2544	0	0					push @piece, defined($1) ? $1 : $2;
2545							}
2546	0						return @piece;
2547							}
2548
2549							#
2550							# Latin-4 parse path
2551							#
2552							sub _parse_path {
2553
2554	0			0			my($path,$pathsep) = @_;
2555
2556	0						$path .= '/';
2557	0						my @subpath = ();
2558	0						while ($path =~ /
2559							((?: [^\/\\] )+?) [\/\\]
2560							/oxmsg
2561							) {
2562	0						push @subpath, $1;
2563							}
2564
2565	0						my $tail = pop @subpath;
2566	0						my $head = join $pathsep, @subpath;
2567	0						return $head, $tail;
2568							}
2569
2570							#
2571							# via File::HomeDir::Windows 1.00
2572							#
2573							sub my_home_MSWin32 {
2574
2575							# A lot of unix people and unix-derived tools rely on
2576							# the ability to overload HOME. We will support it too
2577							# so that they can replace raw HOME calls with File::HomeDir.
2578	0	0	0	0	0		if (exists $ENV{'HOME'} and $ENV{'HOME'}) {
		0	0
		0	0
			0
			0
2579	0						return $ENV{'HOME'};
2580							}
2581
2582							# Do we have a user profile?
2583							elsif (exists $ENV{'USERPROFILE'} and $ENV{'USERPROFILE'}) {
2584	0						return $ENV{'USERPROFILE'};
2585							}
2586
2587							# Some Windows use something like $ENV{'HOME'}
2588							elsif (exists $ENV{'HOMEDRIVE'} and exists $ENV{'HOMEPATH'} and $ENV{'HOMEDRIVE'} and $ENV{'HOMEPATH'}) {
2589	0						return join '', $ENV{'HOMEDRIVE'}, $ENV{'HOMEPATH'};
2590							}
2591
2592	0						return undef;
2593							}
2594
2595							#
2596							# via File::HomeDir::Unix 1.00
2597							#
2598							sub my_home {
2599	0			0	0		my $home;
2600
2601	0	0	0				if (exists $ENV{'HOME'} and defined $ENV{'HOME'}) {
		0	0
2602	0						$home = $ENV{'HOME'};
2603							}
2604
2605							# This is from the original code, but I'm guessing
2606							# it means "login directory" and exists on some Unixes.
2607							elsif (exists $ENV{'LOGDIR'} and $ENV{'LOGDIR'}) {
2608	0						$home = $ENV{'LOGDIR'};
2609							}
2610
2611							### More-desperate methods
2612
2613							# Light desperation on any (Unixish) platform
2614							else {
2615	0						$home = eval q{ (getpwuid($<))[7] };
2616							}
2617
2618							# On Unix in general, a non-existant home means "no home"
2619							# For example, "nobody"-like users might use /nonexistant
2620	0	0	0				if (defined $home and ! -d($home)) {
2621	0						$home = undef;
2622							}
2623	0						return $home;
2624							}
2625
2626							#
2627							# ${^PREMATCH}, $PREMATCH, $` the string preceding what was matched
2628							#
2629							sub Char::Elatin4::PREMATCH {
2630	0			0	0		return $`;
2631							}
2632
2633							#
2634							# ${^MATCH}, $MATCH, $& the string that matched
2635							#
2636							sub Char::Elatin4::MATCH {
2637	0			0	0		return $&;
2638							}
2639
2640							#
2641							# ${^POSTMATCH}, $POSTMATCH, $' the string following what was matched
2642							#
2643							sub Char::Elatin4::POSTMATCH {
2644	0			0	0		return $';
2645							}
2646
2647							#
2648							# Latin-4 character to order (with parameter)
2649							#
2650							sub Char::Latin4::ord(;$) {
2651
2652	0	0		0	1		local $_ = shift if @_;
2653
2654	0	0					if (/\A ($q_char) /oxms) {
2655	0						my @ord = unpack 'C*', $1;
2656	0						my $ord = 0;
2657	0						while (my $o = shift @ord) {
2658	0						$ord = $ord * 0x100 + $o;
2659							}
2660	0						return $ord;
2661							}
2662							else {
2663	0						return CORE::ord $_;
2664							}
2665							}
2666
2667							#
2668							# Latin-4 character to order (without parameter)
2669							#
2670							sub Char::Latin4::ord_() {
2671
2672	0	0		0	0		if (/\A ($q_char) /oxms) {
2673	0						my @ord = unpack 'C*', $1;
2674	0						my $ord = 0;
2675	0						while (my $o = shift @ord) {
2676	0						$ord = $ord * 0x100 + $o;
2677							}
2678	0						return $ord;
2679							}
2680							else {
2681	0						return CORE::ord $_;
2682							}
2683							}
2684
2685							#
2686							# Latin-4 reverse
2687							#
2688							sub Char::Latin4::reverse(@) {
2689
2690	0	0		0	0		if (wantarray) {
2691	0						return CORE::reverse @_;
2692							}
2693							else {
2694
2695							# One of us once cornered Larry in an elevator and asked him what
2696							# problem he was solving with this, but he looked as far off into
2697							# the distance as he could in an elevator and said, "It seemed like
2698							# a good idea at the time."
2699
2700	0						return join '', CORE::reverse(join('',@_) =~ /\G ($q_char) /oxmsg);
2701							}
2702							}
2703
2704							#
2705							# Latin-4 getc (with parameter, without parameter)
2706							#
2707							sub Char::Latin4::getc(;*@) {
2708
2709	0			0	0		my($package) = caller;
2710	0	0					my $fh = @_ ? qualify_to_ref(shift,$package) : \*STDIN;
2711	0	0	0				croak 'Too many arguments for Char::Latin4::getc' if @_ and not wantarray;
2712
2713	0						my @length = sort { $a <=> $b } keys %range_tr;
	0
2714	0						my $getc = '';
2715	0						for my $length ($length[0] .. $length[-1]) {
2716	0						$getc .= CORE::getc($fh);
2717	0	0					if (exists $range_tr{CORE::length($getc)}) {
2718	0	0					if ($getc =~ /\A ${Char::Elatin4::dot_s} \z/oxms) {
2719	0	0					return wantarray ? ($getc,@_) : $getc;
2720							}
2721							}
2722							}
2723	0	0					return wantarray ? ($getc,@_) : $getc;
2724							}
2725
2726							#
2727							# Latin-4 length by character
2728							#
2729							sub Char::Latin4::length(;$) {
2730
2731	0	0		0	1		local $_ = shift if @_;
2732
2733	0						local @_ = /\G ($q_char) /oxmsg;
2734	0						return scalar @_;
2735							}
2736
2737							#
2738							# Latin-4 substr by character
2739							#
2740							BEGIN {
2741
2742							# P.232 The lvalue Attribute
2743							# in Chapter 6: Subroutines
2744							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2745
2746							# P.336 The lvalue Attribute
2747							# in Chapter 7: Subroutines
2748							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2749
2750							# P.144 8.4 Lvalue subroutines
2751							# in Chapter 8: perlsub: Perl subroutines
2752							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
2753
2754	177	50	0	177	1	403573	eval sprintf(<<'END', ($] >= 5.014000) ? ':lvalue' : '');
	0	0		0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
2755							# vv----------------*******
2756							sub Char::Latin4::substr($$;$$) %s {
2757
2758							my @char = $_[0] =~ /\G ($q_char) /oxmsg;
2759
2760							# If the substring is beyond either end of the string, substr() returns the undefined
2761							# value and produces a warning. When used as an lvalue, specifying a substring that
2762							# is entirely outside the string raises an exception.
2763							# http://perldoc.perl.org/functions/substr.html
2764
2765							# A return with no argument returns the scalar value undef in scalar context,
2766							# an empty list () in list context, and (naturally) nothing at all in void
2767							# context.
2768
2769							my $offset = $_[1];
2770							if (($offset > scalar(@char)) or ($offset < (-1 * scalar(@char)))) {
2771							return;
2772							}
2773
2774							# substr($string,$offset,$length,$replacement)
2775							if (@_ == 4) {
2776							my(undef,undef,$length,$replacement) = @_;
2777							my $substr = join '', splice(@char, $offset, $length, $replacement);
2778							$_[0] = join '', @char;
2779
2780							# return $substr; this doesn't work, don't say "return"
2781							$substr;
2782							}
2783
2784							# substr($string,$offset,$length)
2785							elsif (@_ == 3) {
2786							my(undef,undef,$length) = @_;
2787							my $octet_offset = 0;
2788							my $octet_length = 0;
2789							if ($offset == 0) {
2790							$octet_offset = 0;
2791							}
2792							elsif ($offset > 0) {
2793							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2794							}
2795							else {
2796							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2797							}
2798							if ($length == 0) {
2799							$octet_length = 0;
2800							}
2801							elsif ($length > 0) {
2802							$octet_length = CORE::length(join '', @char[$offset..$offset+$length-1]);
2803							}
2804							else {
2805							$octet_length = -1 * CORE::length(join '', @char[$#char+$length+1..$#char]);
2806							}
2807							CORE::substr($_[0], $octet_offset, $octet_length);
2808							}
2809
2810							# substr($string,$offset)
2811							else {
2812							my $octet_offset = 0;
2813							if ($offset == 0) {
2814							$octet_offset = 0;
2815							}
2816							elsif ($offset > 0) {
2817							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2818							}
2819							else {
2820							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2821							}
2822							CORE::substr($_[0], $octet_offset);
2823							}
2824							}
2825							END
2826							}
2827
2828							#
2829							# Latin-4 index by character
2830							#
2831							sub Char::Latin4::index($$;$) {
2832
2833	0			0	1		my $index;
2834	0	0					if (@_ == 3) {
2835	0						$index = Char::Elatin4::index($_[0], $_[1], CORE::length(Char::Latin4::substr($_[0], 0, $_[2])));
2836							}
2837							else {
2838	0						$index = Char::Elatin4::index($_[0], $_[1]);
2839							}
2840
2841	0	0					if ($index == -1) {
2842	0						return -1;
2843							}
2844							else {
2845	0						return Char::Latin4::length(CORE::substr $_[0], 0, $index);
2846							}
2847							}
2848
2849							#
2850							# Latin-4 rindex by character
2851							#
2852							sub Char::Latin4::rindex($$;$) {
2853
2854	0			0	1		my $rindex;
2855	0	0					if (@_ == 3) {
2856	0						$rindex = Char::Elatin4::rindex($_[0], $_[1], CORE::length(Char::Latin4::substr($_[0], 0, $_[2])));
2857							}
2858							else {
2859	0						$rindex = Char::Elatin4::rindex($_[0], $_[1]);
2860							}
2861
2862	0	0					if ($rindex == -1) {
2863	0						return -1;
2864							}
2865							else {
2866	0						return Char::Latin4::length(CORE::substr $_[0], 0, $rindex);
2867							}
2868							}
2869
2870							#
2871							# instead of Carp::carp
2872							#
2873							sub carp {
2874	0			0	0		my($package,$filename,$line) = caller(1);
2875	0						print STDERR "@_ at $filename line $line.\n";
2876							}
2877
2878							#
2879							# instead of Carp::croak
2880							#
2881							sub croak {
2882	0			0	0		my($package,$filename,$line) = caller(1);
2883	0						print STDERR "@_ at $filename line $line.\n";
2884	0						die "\n";
2885							}
2886
2887							#
2888							# instead of Carp::cluck
2889							#
2890							sub cluck {
2891	0			0	0		my $i = 0;
2892	0						my @cluck = ();
2893	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2894	0						push @cluck, "[$i] $filename($line) $package::$subroutine\n";
2895	0						$i++;
2896							}
2897	0						print STDERR CORE::reverse @cluck;
2898	0						print STDERR "\n";
2899	0						carp @_;
2900							}
2901
2902							#
2903							# instead of Carp::confess
2904							#
2905							sub confess {
2906	0			0	0		my $i = 0;
2907	0						my @confess = ();
2908	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2909	0						push @confess, "[$i] $filename($line) $package::$subroutine\n";
2910	0						$i++;
2911							}
2912	0						print STDERR CORE::reverse @confess;
2913	0						print STDERR "\n";
2914	0						croak @_;
2915							}
2916
2917							1;
2918
2919							__END__