File Coverage

blib/lib/UTF2.pm
Criterion Covered Total %
statement 64 124 51.6
branch 22 70 31.4
condition 0 3 0.0
subroutine 9 11 81.8
pod 0 2 0.0
total 95 210 45.2


line stmt bran cond sub pod time code
1             package UTF2;
2 306     306   231950 use strict;
  306         3227  
  306         10494  
3             ######################################################################
4             #
5             # UTF2 - Source code filter to escape UTF-8 script
6             #
7             # http://search.cpan.org/dist/Char-UTF2/
8             #
9             # Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2018, 2019 INABA Hitoshi
10             ######################################################################
11              
12 306     306   6121 use 5.00503; # Galapagos Consensus 1998 for primetools
  306         940  
13             # use 5.008001; # Lancaster Consensus 2013 for toolchains
14              
15             # 12.3. Delaying use Until Runtime
16             # in Chapter 12. Packages, Libraries, and Modules
17             # of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
18             # (and so on)
19              
20             # Version numbers should be boring
21             # http://www.dagolden.com/index.php/369/version-numbers-should-be-boring/
22             # For the impatient, the disinterested or those who just want to follow
23             # a recipe, my advice for all modules is this:
24             # our $VERSION = "0.001"; # or "0.001_001" for a dev release
25             # $VERSION = CORE::eval $VERSION; # No!! because '1.10' makes '1.1'
26              
27 306     306   2128 use vars qw($VERSION);
  306         643  
  306         51974  
28             $VERSION = '1.13';
29             $VERSION = $VERSION;
30              
31             BEGIN {
32 306 50   306   2172 if ($^X =~ / jperl /oxmsi) {
33 0         0 die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)\n";
34             }
35 306         647 if (CORE::ord('A') == 193) {
36             die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).\n";
37             }
38 306         21284 if (CORE::ord('A') != 0x41) {
39             die __FILE__, ": is not US-ASCII script (must be US-ASCII script).\n";
40             }
41             }
42              
43             BEGIN {
44 306     306   3009 (my $dirname = __FILE__) =~ s{^(.+)/[^/]*$}{$1};
45 306         929 unshift @INC, $dirname;
46 306         651734 CORE::require Eutf2;
47             }
48              
49             # instead of Symbol.pm
50 918         2189 BEGIN {
51             sub gensym () {
52 918     918 0 4670 return \do { local *_ };
  306         21664  
53             }
54             }
55              
56             # P.714 29.2.39. flock
57             # in Chapter 29: Functions
58             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
59              
60             # P.863 flock
61             # in Chapter 27: Functions
62             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
63              
64             # P.228 Inlining Constant Functions
65             # in Chapter 6: Subroutines
66             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
67              
68             # P.331 Inlining Constant Functions
69             # in Chapter 7: Subroutines
70             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
71              
72             sub LOCK_SH() {1}
73             sub LOCK_EX() {2}
74             sub LOCK_UN() {8}
75             sub LOCK_NB() {4}
76              
77       0     sub unimport {}
78             sub UTF2::escape_script;
79              
80             # 6.18. Matching Multiple-Byte Characters
81             # in Chapter 6. Pattern Matching
82             # of ISBN 978-1-56592-243-3 Perl Perl Cookbook.
83             # (and so on)
84              
85             # regexp of character
86             my $qq_char = qr/(?> \\c[\x40-\x5F] | \\? (?:(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] | [\x00-\xFF]) )/oxms;
87             my $q_char = qr/(?> (?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] | [\x00-\xFF] )/oxms;
88              
89             # when this script is main program
90             if ($0 eq __FILE__) {
91              
92             # show usage
93             unless (@ARGV) {
94             die <
95             $0: usage
96              
97             perl $0 UTF-8_script.pl > Escaped_script.pl.e
98             END
99             }
100              
101             print UTF2::escape_script($ARGV[0]);
102             exit 0;
103             }
104              
105             my($package,$filename,$line,$subroutine,$hasargs,$wantarray,$evaltext,$is_require,$hints,$bitmask) = caller 0;
106              
107             # called any package not main
108             if ($package ne 'main') {
109             die <
110             @{[__FILE__]}: escape by manually command '$^X @{[__FILE__]} "$filename" > "@{[__PACKAGE__]}::$filename"'
111             and rewrite "use $package;" to "use @{[__PACKAGE__]}::$package;" of script "$0".
112             END
113             }
114              
115             # P.302 Module Privacy and the Exporter
116             # in Chapter 11: Modules
117             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
118             #
119             # A module can do anything it jolly well pleases when it's used, since use just
120             # calls the ordinary import method for the module, and you can define that
121             # method to do anything you like.
122              
123             # P.406 Module Privacy and the Exporter
124             # in Chapter 11: Modules
125             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
126             #
127             # A module can do anything it jolly well pleases when it's used, since use just
128             # calls the ordinary import method for the module, and you can define that
129             # method to do anything you like.
130              
131             sub import {
132              
133 0 50   306   0 if (-e("$filename.e")) {
134 0 0       0 if (exists $ENV{'CHAR_DEBUG'}) {
    0          
135 0         0 unlink "$filename.e";
136             }
137             elsif (-z("$filename.e")) {
138 0         0 unlink "$filename.e";
139             }
140             else {
141              
142             #----------------------------------------------------
143             # older >
144             # newer >>>>>
145             #----------------------------------------------------
146             # Filter >
147             # Source >>>>>
148             # Escape >>> needs re-escape (Source was changed)
149             #
150             # Filter >>>
151             # Source >>>>>
152             # Escape > needs re-escape (Source was changed)
153             #
154             # Filter >>>>>
155             # Source >>>
156             # Escape > needs re-escape (Source was changed)
157             #
158             # Filter >>>>>
159             # Source >
160             # Escape >>> needs re-escape (Filter was changed)
161             #
162             # Filter >
163             # Source >>>
164             # Escape >>>>> executable without re-escape
165             #
166             # Filter >>>
167             # Source >
168             # Escape >>>>> executable without re-escape
169             #----------------------------------------------------
170              
171 0         0 my $mtime_filter = (stat(__FILE__ ))[9];
172 0         0 my $mtime_source = (stat($filename ))[9];
173 0         0 my $mtime_escape = (stat("$filename.e"))[9];
174 0 0 0     0 if (($mtime_escape < $mtime_source) or ($mtime_escape < $mtime_filter)) {
175 306         3635 unlink "$filename.e";
176             }
177             }
178             }
179              
180 306 50       1028 if (not -e("$filename.e")) {
181 306         1696 my $fh = gensym();
182 306 50       2777 Eutf2::_open_a($fh, "$filename.e") or die __FILE__, ": Can't write open file: $filename.e\n";
183              
184             # 7.19. Flushing Output
185             # in Chapter 7. File Access
186             # of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
187              
188 306         1798 select((select($fh), $|=1)[0]);
189              
190 0 50       0 if (0) {
191             }
192 0         0 elsif (exists $ENV{'CHAR_NONBLOCK'}) {
193              
194             # P.419 File Locking
195             # in Chapter 16: Interprocess Communication
196             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
197              
198             # P.524 File Locking
199             # in Chapter 15: Interprocess Communication
200             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
201              
202             # P.571 Handling Race Conditions
203             # in Chapter 23: Security
204             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
205              
206             # P.663 Handling Race Conditions
207             # in Chapter 20: Security
208             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
209              
210             # (and so on)
211              
212 0         0 CORE::eval q{ flock($fh, LOCK_EX | LOCK_NB) };
213 0 0       0 if ($@) {
214 306         19487 die __FILE__, ": Can't immediately write-lock the file: $filename.e\n";
215             }
216             }
217             else {
218 306         13711 CORE::eval q{ flock($fh, LOCK_EX) };
219             }
220              
221 306         3907 CORE::eval q{ truncate($fh, 0) };
222 306 50       7898 seek($fh, 0, 0) or die __FILE__, ": Can't seek file: $filename.e\n";
223              
224 306         812 my $e_script = UTF2::escape_script($filename);
225 306         29144 print {$fh} $e_script;
  306         9812  
226              
227 306         8346 my $mode = (stat($filename))[2] & 0777;
228 306         54830 chmod $mode, "$filename.e";
229              
230 306 50       2035 close($fh) or die __FILE__, ": Can't close file: $filename.e\n";
231             }
232              
233 306         2341 my $fh = gensym();
234 306 50       2060 Eutf2::_open_r($fh, "$filename.e") or die __FILE__, ": Can't read open file: $filename.e\n";
235              
236 0 50       0 if (0) {
237             }
238 0         0 elsif (exists $ENV{'CHAR_NONBLOCK'}) {
239 0         0 CORE::eval q{ flock($fh, LOCK_SH | LOCK_NB) };
240 0 0       0 if ($@) {
241 306         24811 die __FILE__, ": Can't immediately read-lock the file: $filename.e\n";
242             }
243             }
244             else {
245 306         1605 CORE::eval q{ flock($fh, LOCK_SH) };
246             }
247              
248 306         1808 my @switch = ();
249 0 50       0 if ($^W) {
250 306         1320 push @switch, '-w';
251             }
252 0 50       0 if (defined $^I) {
253 0         0 push @switch, '-i' . $^I;
254 306         627 undef $^I;
255             }
256              
257             # P.707 29.2.33. exec
258             # in Chapter 29: Functions
259             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
260             #
261             # If there is more than one argument in LIST, or if LIST is an array with more
262             # than one value, the system shell will never be used. This also bypasses any
263             # shell processing of the command. The presence or absence of metacharacters in
264             # the arguments doesn't affect this list-triggered behavior, which makes it the
265             # preferred from in security-conscious programs that do not with to expose
266             # themselves to potential shell escapes.
267             # Environment variable PERL5SHELL(Microsoft ports only) will never be used, too.
268              
269             # P.855 exec
270             # in Chapter 27: Functions
271             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
272             #
273             # If there is more than one argument in LIST, or if LIST is an array with more
274             # than one value, the system shell will never be used. This also bypasses any
275             # shell processing of the command. The presence or absence of metacharacters in
276             # the arguments doesn't affect this list-triggered behavior, which makes it the
277             # preferred from in security-conscious programs that do not wish to expose
278             # themselves to injection attacks via shell escapes.
279             # Environment variable PERL5SHELL(Microsoft ports only) will never be used, too.
280              
281             # P.489 #! and Quoting on Non-Unix Systems
282             # in Chapter 19: The Command-Line Interface
283             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
284              
285             # P.578 #! and Quoting on Non-Unix Systems
286             # in Chapter 17: The Command-Line Interface
287             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
288              
289 306         2047 my $system = 0;
290              
291             # DOS-like system
292 0 50       0 if ($^O =~ /\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
293             $system = Eutf2::_systemx(
294             _escapeshellcmd_MSWin32($^X),
295              
296             # -I switch can not treat space included path
297             # (map { '-I' . _escapeshellcmd_MSWin32($_) } @INC),
298 0         0 (map { '-I' . $_ } @INC),
299              
300             @switch,
301             '--',
302 0         0 map { _escapeshellcmd_MSWin32($_) } "$filename.e", @ARGV
  306         1613  
303             );
304             }
305              
306             # UNIX-like system
307             else {
308             $system = Eutf2::_systemx(
309             _escapeshellcmd($^X),
310 306         914 (map { '-I' . _escapeshellcmd($_) } @INC),
311             @switch,
312             '--',
313 3672         4953 map { _escapeshellcmd($_) } "$filename.e", @ARGV
  306         552736  
314             );
315             }
316              
317             # exit with actual exit value
318 0         0 exit($system >> 8);
319             }
320              
321             # escape shell command line on DOS-like system
322             sub _escapeshellcmd_MSWin32 {
323 0     0   0 my($word) = @_;
324 0 0       0 if ($word =~ / [ ] /oxms) {
325 0         0 return qq{"$word"};
326             }
327             else {
328 4284         6619 return $word;
329             }
330             }
331              
332             # escape shell command line on UNIX-like system
333             sub _escapeshellcmd {
334 4284     4284   10988 my($word) = @_;
335 306         892 return $word;
336             }
337              
338             # P.619 Source Filters
339             # in Chapter 24: Common Practices
340             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
341              
342             # P.718 Source Filters
343             # in Chapter 21: Common Practices
344             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
345              
346             # escape UTF-8 script
347             sub UTF2::escape_script {
348 306     306 0 657 my($script) = @_;
349 306         626 my $e_script = '';
350              
351             # read UTF-8 script
352 306         1552 my $fh = gensym();
353 306 50       2022 Eutf2::_open_r($fh, $script) or die __FILE__, ": Can't open file: $script\n";
354 306         9443 local $/ = undef; # slurp mode
355 306         5766 $_ = <$fh>;
356 306 50       1933 close($fh) or die __FILE__, ": Can't close file: $script\n";
357              
358 0 50       0 if (/^ use Eutf2(?:(?>\s+)(?>[0-9\.]*))?(?>\s*); $/oxms) {
359 306         992 return $_;
360             }
361             else {
362              
363             # #! shebang line
364 0 50       0 if (s/\A(#!.+?\n)//oms) {
365 0         0 my $head = $1;
366 0         0 $head =~ s/\bjperl\b/perl/gi;
367 306         734 $e_script .= $head;
368             }
369              
370             # DOS-like system header
371 0 50       0 if (s/\A(\@rem(?>\s*)=(?>\s*)'.*?'(?>\s*);\s*\n)//oms) {
372 0         0 my $head = $1;
373 0         0 $head =~ s/\bjperl\b/perl/gi;
374 306         18551 $e_script .= $head;
375             }
376              
377             # P.618 Generating Perl in Other Languages
378             # in Chapter 24: Common Practices
379             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
380              
381             # P.717 Generating Perl in Other Languages
382             # in Chapter 21: Common Practices
383             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
384              
385 0 50       0 if (s/(.*^#(?>\s*)line(?>\s+)(?>[0-9]+)(?:(?>\s+)"(?:$q_char)+?")?\s*\n)//oms) {
386 0         0 my $head = $1;
387 0         0 $head =~ s/\bjperl\b/perl/gi;
388 306         2062 $e_script .= $head;
389             }
390              
391             # P.210 5.10.3.3. Match-time code evaluation
392             # in Chapter 5: Pattern Matching
393             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
394              
395             # P.255 Match-time code evaluation
396             # in Chapter 5: Pattern Matching
397             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
398              
399             # '...' quote to avoid "Octal number in vector unsupported" on perl 5.6
400              
401 306         3869 $e_script .= sprintf("use Eutf2 '%s.0'; # 'quote' for perl5.6\n", $UTF2::VERSION); # require run-time routines version
402              
403             # use UTF2 version qw(ord reverse getc);
404 306 50       864 if (s/^ (?>\s*) use (?>\s+) (?: Char | UTF2 ) (?>\s*) ([^\x80-\xFF;]*) ; \s* \n? $//oxms) {
405              
406             # require version
407 306         1064 my $list = $1;
408 0 50       0 if ($list =~ s/\A ((?>[0-9]+)\.(?>[0-9]+)) \.0 (?>\s*) //oxms) {
    50          
409 0         0 my $version = $1;
410 0 0       0 if ($version ne $UTF2::VERSION) {
411 0         0 my @file = grep -e, map {qq{$_/UTF2.pm}} @INC;
  0         0  
412 0         0 my %file = map { $_ => 1 } @file;
  0         0  
413 0 0       0 if (scalar(keys %file) >= 2) {
414 0         0 my $file = join "\n", sort keys %file;
415 0         0 warn <
416             ****************************************************
417             C A U T I O N
418              
419             CONFLICT UTF2.pm FILE
420              
421             $file
422             ****************************************************
423              
424             END
425             }
426 0         0 die "Script $0 expects UTF2.pm $version, but @{[__FILE__]} is version $UTF2::VERSION\n";
  0         0  
427             }
428 0         0 $e_script .= qq{die "Script \$0 expects Eutf2.pm $version, but \\\$Eutf2::VERSION is \$Eutf2::VERSION" if \$Eutf2::VERSION ne '$version';\n};
429             }
430             elsif ($list =~ s/\A ((?>[0-9]+)(?>\.[0-9]*)) (?>\s*) //oxms) {
431 0         0 my $version = $1;
432 0 0       0 if ($version > $UTF2::VERSION) {
433 0         0 die "Script $0 required UTF2.pm $version, but @{[__FILE__]} is only version $UTF2::VERSION\n";
  306         1523  
434             }
435             }
436              
437             # demand ord, reverse, and getc
438 0 50       0 if ($list !~ /\A (?>\s*) \z/oxms) {
439 0         0 local $@;
440 0         0 my @list = CORE::eval $list;
441 0         0 for (@list) {
442 0 0       0 $Eutf2::function_ord = 'UTF2::ord' if /\A ord \z/oxms;
443 0 0       0 $Eutf2::function_ord_ = 'UTF2::ord_' if /\A ord \z/oxms;
444 0 0       0 $Eutf2::function_reverse = 'UTF2::reverse' if /\A reverse \z/oxms;
445 306 0       1175 $Eutf2::function_getc = 'UTF2::getc' if /\A getc \z/oxms;
446             }
447             }
448             }
449             }
450              
451 306         3046 $e_script .= UTF2::escape();
452              
453             return $e_script;
454             }
455              
456             1;
457              
458             __END__