File Coverage

blib/lib/UTF2.pm
Criterion Covered Total %
statement 68 134 50.7
branch 24 74 32.4
condition 0 3 0.0
subroutine 11 13 84.6
pod 0 2 0.0
total 103 226 45.5


line stmt bran cond sub pod time code
1             package UTF2;
2 308     308   189514 use strict;
  308         2023  
  308         12824  
3 308 50   308   14640 BEGIN { $INC{'warnings.pm'} = '' if $] < 5.006 } use warnings;
  308     308   1461  
  308         495  
  308         8874  
4             ######################################################################
5             #
6             # UTF2 - Source code filter to escape UTF-8 script
7             #
8             # http://search.cpan.org/dist/Char-UTF2/
9             #
10             # Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2018, 2019 INABA Hitoshi
11             ######################################################################
12              
13 308     308   7381 use 5.00503; # Galapagos Consensus 1998 for primetools
  308         894  
14             # use 5.008001; # Lancaster Consensus 2013 for toolchains
15              
16             # 12.3. Delaying use Until Runtime
17             # in Chapter 12. Packages, Libraries, and Modules
18             # of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
19             # (and so on)
20              
21             # Version numbers should be boring
22             # http://www.dagolden.com/index.php/369/version-numbers-should-be-boring/
23             # For the impatient, the disinterested or those who just want to follow
24             # a recipe, my advice for all modules is this:
25             # our $VERSION = "0.001"; # or "0.001_001" for a dev release
26             # $VERSION = CORE::eval $VERSION; # No!! because '1.10' makes '1.1'
27              
28 308     308   1742 use vars qw($VERSION);
  308         503  
  308         45039  
29             $VERSION = '1.22';
30             $VERSION = $VERSION;
31              
32             BEGIN {
33 308 50   308   2065 if ($^X =~ / jperl /oxmsi) {
34 0         0 die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)\n";
35             }
36 308         483 if (CORE::ord('A') == 193) {
37             die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).\n";
38             }
39 308         22291 if (CORE::ord('A') != 0x41) {
40             die __FILE__, ": is not US-ASCII script (must be US-ASCII script).\n";
41             }
42             }
43              
44             BEGIN {
45 308     308   2949 (my $dirname = __FILE__) =~ s{^(.+)/[^/]*$}{$1};
46 308         1150 unshift @INC, $dirname;
47 308         509651 CORE::require Eutf2;
48             }
49              
50             # instead of Symbol.pm
51 924         5793 BEGIN {
52             sub gensym () {
53 0 50   924 0 0 if ($] < 5.006) {
54 0         0 return \do { local *_ };
  924         2314  
55             }
56             else {
57 308         18243 return undef;
58             }
59             }
60             }
61              
62             # P.714 29.2.39. flock
63             # in Chapter 29: Functions
64             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
65              
66             # P.863 flock
67             # in Chapter 27: Functions
68             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
69              
70             # P.228 Inlining Constant Functions
71             # in Chapter 6: Subroutines
72             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
73              
74             # P.331 Inlining Constant Functions
75             # in Chapter 7: Subroutines
76             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
77              
78             sub LOCK_SH() {1}
79             sub LOCK_EX() {2}
80             sub LOCK_UN() {8}
81             sub LOCK_NB() {4}
82              
83       0     sub unimport {}
84             sub UTF2::escape_script;
85              
86             # 6.18. Matching Multiple-Byte Characters
87             # in Chapter 6. Pattern Matching
88             # of ISBN 978-1-56592-243-3 Perl Perl Cookbook.
89             # (and so on)
90              
91             # regexp of character
92             my $qq_char = qr/(?> \\c[\x40-\x5F] | \\? (?:(?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] | [\x00-\xFF]) )/oxms;
93             my $q_char = qr/(?> (?:[\xC2-\xDF]|[\xE0-\xE0][\xA0-\xBF]|[\xE1-\xEC][\x80-\xBF]|[\xED-\xED][\x80-\x9F]|[\xEE-\xEF][\x80-\xBF]|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] | [\x00-\xFF] )/oxms;
94              
95             # when this script is main program
96             if ($0 eq __FILE__) {
97              
98             # show usage
99             unless (@ARGV) {
100             die <
101             $0: usage
102              
103             perl $0 UTF-8_script.pl > Escaped_script.pl.e
104             END
105             }
106              
107             print UTF2::escape_script($ARGV[0]);
108             exit 0;
109             }
110              
111             my($package,$filename,$line,$subroutine,$hasargs,$wantarray,$evaltext,$is_require,$hints,$bitmask) = caller 0;
112              
113             # called any package not main
114             if ($package ne 'main') {
115             die <
116             @{[__FILE__]}: escape by manually command '$^X @{[__FILE__]} "$filename" > "@{[__PACKAGE__]}::$filename"'
117             and rewrite "use $package;" to "use @{[__PACKAGE__]}::$package;" of script "$0".
118             END
119             }
120              
121             # P.302 Module Privacy and the Exporter
122             # in Chapter 11: Modules
123             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
124             #
125             # A module can do anything it jolly well pleases when it's used, since use just
126             # calls the ordinary import method for the module, and you can define that
127             # method to do anything you like.
128              
129             # P.406 Module Privacy and the Exporter
130             # in Chapter 11: Modules
131             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
132             #
133             # A module can do anything it jolly well pleases when it's used, since use just
134             # calls the ordinary import method for the module, and you can define that
135             # method to do anything you like.
136              
137             sub import {
138              
139 0 50   308   0 if (-e("$filename.e")) {
140 0 0       0 if (exists $ENV{'CHAR_DEBUG'}) {
    0          
141 0         0 unlink "$filename.e";
142             }
143             elsif (-z("$filename.e")) {
144 0         0 unlink "$filename.e";
145             }
146             else {
147              
148             #----------------------------------------------------
149             # older >
150             # newer >>>>>
151             #----------------------------------------------------
152             # Filter >
153             # Source >>>>>
154             # Escape >>> needs re-escape (Source was changed)
155             #
156             # Filter >>>
157             # Source >>>>>
158             # Escape > needs re-escape (Source was changed)
159             #
160             # Filter >>>>>
161             # Source >>>
162             # Escape > needs re-escape (Source was changed)
163             #
164             # Filter >>>>>
165             # Source >
166             # Escape >>> needs re-escape (Filter was changed)
167             #
168             # Filter >
169             # Source >>>
170             # Escape >>>>> executable without re-escape
171             #
172             # Filter >>>
173             # Source >
174             # Escape >>>>> executable without re-escape
175             #----------------------------------------------------
176              
177 0         0 my $mtime_filter = (stat(__FILE__ ))[9];
178 0         0 my $mtime_source = (stat($filename ))[9];
179 0         0 my $mtime_escape = (stat("$filename.e"))[9];
180 0 0 0     0 if (($mtime_escape < $mtime_source) or ($mtime_escape < $mtime_filter)) {
181 308         3145 unlink "$filename.e";
182             }
183             }
184             }
185              
186 308 50       1050 if (not -e("$filename.e")) {
187 308         1553 my $fh = gensym();
188 308 50       2853 Eutf2::_open_a($fh, "$filename.e") or die __FILE__, ": Can't write open file: $filename.e\n";
189              
190             # 7.19. Flushing Output
191             # in Chapter 7. File Access
192             # of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
193              
194 308         1310 select((select($fh), $|=1)[0]);
195              
196 0 50       0 if (0) {
197             }
198 0         0 elsif (exists $ENV{'CHAR_NONBLOCK'}) {
199              
200             # P.419 File Locking
201             # in Chapter 16: Interprocess Communication
202             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
203              
204             # P.524 File Locking
205             # in Chapter 15: Interprocess Communication
206             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
207              
208             # P.571 Handling Race Conditions
209             # in Chapter 23: Security
210             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
211              
212             # P.663 Handling Race Conditions
213             # in Chapter 20: Security
214             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
215              
216             # (and so on)
217              
218 0         0 CORE::eval q{ flock($fh, LOCK_EX | LOCK_NB) };
219 0 0       0 if ($@) {
220 308         18882 die __FILE__, ": Can't immediately write-lock the file: $filename.e\n";
221             }
222             }
223             else {
224 308         12751 CORE::eval q{ flock($fh, LOCK_EX) };
225             }
226              
227 308         3760 CORE::eval q{ truncate($fh, 0) };
228 308 50       1064 seek($fh, 0, 0) or die __FILE__, ": Can't seek file: $filename.e\n";
229              
230 308         721 my $e_script = UTF2::escape_script($filename);
231 308         28464 print {$fh} $e_script;
  308         8550  
232              
233 308         10890 my $mode = (stat($filename))[2] & 0777;
234 308         61581 chmod $mode, "$filename.e";
235              
236 308 50       1715 close($fh) or die "Can't close file: $filename.e: $!";
237             }
238              
239 308         2152 my $fh = gensym();
240 308 50       1996 Eutf2::_open_r($fh, "$filename.e") or die __FILE__, ": Can't read open file: $filename.e\n";
241              
242 0 50       0 if (0) {
243             }
244 0         0 elsif (exists $ENV{'CHAR_NONBLOCK'}) {
245 0         0 CORE::eval q{ flock($fh, LOCK_SH | LOCK_NB) };
246 0 0       0 if ($@) {
247 308         23810 die __FILE__, ": Can't immediately read-lock the file: $filename.e\n";
248             }
249             }
250             else {
251 308         1497 CORE::eval q{ flock($fh, LOCK_SH) };
252             }
253              
254 308         1673 my @switch = ();
255 0 50       0 if ($^W) {
256 308         1381 push @switch, '-w';
257             }
258 0 50       0 if (defined $^I) {
259 0         0 push @switch, '-i' . $^I;
260 308         659 undef $^I;
261             }
262              
263             # P.707 29.2.33. exec
264             # in Chapter 29: Functions
265             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
266             #
267             # If there is more than one argument in LIST, or if LIST is an array with more
268             # than one value, the system shell will never be used. This also bypasses any
269             # shell processing of the command. The presence or absence of metacharacters in
270             # the arguments doesn't affect this list-triggered behavior, which makes it the
271             # preferred from in security-conscious programs that do not with to expose
272             # themselves to potential shell escapes.
273             # Environment variable PERL5SHELL(Microsoft ports only) will never be used, too.
274              
275             # P.855 exec
276             # in Chapter 27: Functions
277             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
278             #
279             # If there is more than one argument in LIST, or if LIST is an array with more
280             # than one value, the system shell will never be used. This also bypasses any
281             # shell processing of the command. The presence or absence of metacharacters in
282             # the arguments doesn't affect this list-triggered behavior, which makes it the
283             # preferred from in security-conscious programs that do not wish to expose
284             # themselves to injection attacks via shell escapes.
285             # Environment variable PERL5SHELL(Microsoft ports only) will never be used, too.
286              
287             # P.489 #! and Quoting on Non-Unix Systems
288             # in Chapter 19: The Command-Line Interface
289             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
290              
291             # P.578 #! and Quoting on Non-Unix Systems
292             # in Chapter 17: The Command-Line Interface
293             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
294              
295 308         2248 my $system = 0;
296              
297             # DOS-like system
298 0 50       0 if ($^O =~ /\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
299             $system = Eutf2::_systemx(
300             _escapeshellcmd_MSWin32($^X),
301              
302             # -I switch can not treat space included path
303             # (map { '-I' . _escapeshellcmd_MSWin32($_) } @INC),
304 0         0 (map { '-I' . $_ } @INC),
305              
306             @switch,
307             '--',
308 0         0 map { _escapeshellcmd_MSWin32($_) } "$filename.e", @ARGV
  308         1363  
309             );
310             }
311              
312             # UNIX-like system
313             else {
314             $system = Eutf2::_systemx(
315             _escapeshellcmd($^X),
316 308         942 (map { '-I' . _escapeshellcmd($_) } @INC),
317             @switch,
318             '--',
319 3696         5349 map { _escapeshellcmd($_) } "$filename.e", @ARGV
  308         535348  
320             );
321             }
322              
323             # exit with actual exit value
324 0         0 exit($system >> 8);
325             }
326              
327             # escape shell command line on DOS-like system
328             sub _escapeshellcmd_MSWin32 {
329 0     0   0 my($word) = @_;
330 0 0       0 if ($word =~ / [ ] /oxms) {
331 0         0 return qq{"$word"};
332             }
333             else {
334 4312         6296 return $word;
335             }
336             }
337              
338             # escape shell command line on UNIX-like system
339             sub _escapeshellcmd {
340 4312     4312   10489 my($word) = @_;
341 308         686 return $word;
342             }
343              
344             # P.619 Source Filters
345             # in Chapter 24: Common Practices
346             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
347              
348             # P.718 Source Filters
349             # in Chapter 21: Common Practices
350             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
351              
352             # escape UTF-8 script
353             sub UTF2::escape_script {
354 308     308 0 551 my($script) = @_;
355 308         750 my $e_script = '';
356              
357             # read UTF-8 script
358 308         1308 my $fh = gensym();
359 308 50       1717 Eutf2::_open_r($fh, $script) or die __FILE__, ": Can't open file: $script\n";
360 308         7608 local $/ = undef; # slurp mode
361 308         3430 $_ = <$fh>;
362 308 50       1594 close($fh) or die "Can't close file: $script: $!";
363              
364 0 50       0 if (/^ use Eutf2(?:(?>\s+)(?>[0-9\.]*))?(?>\s*); $/oxms) {
365 308         1019 return $_;
366             }
367             else {
368              
369             # #! shebang line
370 0 50       0 if (s/\A(#!.+?\n)//oms) {
371 0         0 my $head = $1;
372 0         0 $head =~ s/\bjperl\b/perl/gi;
373 308         763 $e_script .= $head;
374             }
375              
376             # DOS-like system header
377 0 50       0 if (s/\A(\@rem(?>\s*)=(?>\s*)'.*?'(?>\s*);\s*\n)//oms) {
378 0         0 my $head = $1;
379 0         0 $head =~ s/\bjperl\b/perl/gi;
380 308         18395 $e_script .= $head;
381             }
382              
383             # P.618 Generating Perl in Other Languages
384             # in Chapter 24: Common Practices
385             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
386              
387             # P.717 Generating Perl in Other Languages
388             # in Chapter 21: Common Practices
389             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
390              
391 0 50       0 if (s/(.*^#(?>\s*)line(?>\s+)(?>[0-9]+)(?:(?>\s+)"(?:$q_char)+?")?\s*\n)//oms) {
392 0         0 my $head = $1;
393 0         0 $head =~ s/\bjperl\b/perl/gi;
394 308         1979 $e_script .= $head;
395             }
396              
397             # P.210 5.10.3.3. Match-time code evaluation
398             # in Chapter 5: Pattern Matching
399             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
400              
401             # P.255 Match-time code evaluation
402             # in Chapter 5: Pattern Matching
403             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
404              
405             # '...' quote to avoid "Octal number in vector unsupported" on perl 5.6
406              
407 308         3535 $e_script .= sprintf("use Eutf2 '%s.0'; # 'quote' for perl5.6\n", $UTF2::VERSION); # require run-time routines version
408              
409             # use UTF2 version qw(ord reverse getc);
410 308 50       871 if (s/^ (?>\s*) use (?>\s+) (?: Char | UTF2 ) (?>\s*) ([^\x80-\xFF;]*) ; \s* \n? $//oxms) {
411              
412             # require version
413 308         1103 my $list = $1;
414 0 50       0 if ($list =~ s/\A ((?>[0-9]+)\.(?>[0-9]+)) \.0 (?>\s*) //oxms) {
    50          
415 0         0 my $version = $1;
416 0 0       0 if ($version ne $UTF2::VERSION) {
417 0         0 my @file = grep -e, map {qq{$_/UTF2.pm}} @INC;
  0         0  
418 0         0 my %file = map { $_ => 1 } @file;
  0         0  
419 0 0       0 if (scalar(keys %file) >= 2) {
420 0         0 my $file = join "\n", sort keys %file;
421 0         0 warn <
422             ****************************************************
423             C A U T I O N
424              
425             CONFLICT UTF2.pm FILE
426              
427             $file
428             ****************************************************
429              
430             END
431             }
432 0         0 die "Script $0 expects UTF2.pm $version, but @{[__FILE__]} is version $UTF2::VERSION\n";
  0         0  
433             }
434 0         0 $e_script .= qq{die "Script \$0 expects Eutf2.pm $version, but \\\$Eutf2::VERSION is \$Eutf2::VERSION" if \$Eutf2::VERSION ne '$version';\n};
435             }
436             elsif ($list =~ s/\A ((?>[0-9]+)(?>\.[0-9]*)) (?>\s*) //oxms) {
437 0         0 my $version = $1;
438 0 0       0 if ($version > $UTF2::VERSION) {
439 0         0 die "Script $0 required UTF2.pm $version, but @{[__FILE__]} is only version $UTF2::VERSION\n";
  308         1470  
440             }
441             }
442              
443             # demand ord, reverse, and getc
444 0 50       0 if ($list !~ /\A (?>\s*) \z/oxms) {
445 0         0 local $@;
446 0         0 my @list = CORE::eval $list;
447 0         0 for (@list) {
448 0 0       0 $Eutf2::function_ord = 'UTF2::ord' if /\A ord \z/oxms;
449 0 0       0 $Eutf2::function_ord_ = 'UTF2::ord_' if /\A ord \z/oxms;
450 0 0       0 $Eutf2::function_reverse = 'UTF2::reverse' if /\A reverse \z/oxms;
451 0 0       0 $Eutf2::function_getc = 'UTF2::getc' if /\A getc \z/oxms;
452              
453             # avoid: used only once: possible typo at ...
454 0         0 $Eutf2::function_ord = $Eutf2::function_ord;
455 0         0 $Eutf2::function_ord_ = $Eutf2::function_ord_;
456 0         0 $Eutf2::function_reverse = $Eutf2::function_reverse;
457 308         1157 $Eutf2::function_getc = $Eutf2::function_getc;
458             }
459             }
460             }
461             }
462              
463 308         3138 $e_script .= UTF2::escape();
464              
465             return $e_script;
466             }
467              
468             1;
469              
470             __END__