File Coverage

blib/lib/File/Find/utf8.pm
Criterion Covered Total %
statement 87 89 97.7
branch 29 54 53.7
condition 13 32 40.6
subroutine 16 16 100.0
pod n/a
total 145 191 75.9


line stmt bran cond sub pod time code
1             package File::Find::utf8;
2 4     4   254667 use strict;
  4         39  
  4         99  
3 4     4   18 use warnings;
  4         5  
  4         84  
4 4     4   55 use 5.010; # state
  4         12  
5              
6             # ABSTRACT: Fully UTF-8 aware File::Find
7             our $VERSION = '0.014'; # VERSION
8              
9             #pod =begin :prelude
10             #pod
11             #pod =for test_synopsis
12             #pod my @directories_to_search;
13             #pod
14             #pod =end :prelude
15             #pod
16             #pod =head1 SYNOPSIS
17             #pod
18             #pod # Use the utf-8 versions of find and finddepth
19             #pod use File::Find::utf8;
20             #pod find(\&wanted, @directories_to_search);
21             #pod
22             #pod # Revert back to non-utf-8 versions
23             #pod no File::Find::utf8;
24             #pod finddepth(\&wanted, @directories_to_search);
25             #pod
26             #pod # Export only the find function
27             #pod use File::Find::utf8 qw(find);
28             #pod find(\&wanted, @directories_to_search);
29             #pod
30             #pod # Export no functions
31             #pod use File::Find::utf8 qw(:none); # NOT "use File::Find::utf8 qw();"!
32             #pod File::Find::find(\&wanted, @directories_to_search);
33             #pod
34             #pod =head1 DESCRIPTION
35             #pod
36             #pod While the original L functions are capable of handling
37             #pod UTF-8 quite well, they expect and return all data as bytes, not as
38             #pod characters.
39             #pod
40             #pod This module replaces the L functions with fully UTF-8
41             #pod aware versions, both expecting and returning characters.
42             #pod
43             #pod B Replacement of functions is not done on DOS, Windows, and OS/2
44             #pod as these systems do not have full UTF-8 file system support.
45             #pod
46             #pod =head2 Behaviour
47             #pod
48             #pod The module behaves as a pragma so you can use both C
49             #pod File::Find::utf8> and C to turn utf-8 support on
50             #pod or off.
51             #pod
52             #pod By default, both find() and finddepth() are exported (as with the original
53             #pod L), if you want to prevent this, use C
54             #pod qw(:none)>. (As all the magic happens in the module's import function,
55             #pod you can not simply use C)
56             #pod
57             #pod L warning levels are properly propagated. Note though that
58             #pod for propagation of fatal L warnings, Perl 5.12 or higher
59             #pod is required (or the appropriate version of L).
60             #pod
61             #pod =head1 COMPATIBILITY
62             #pod
63             #pod The filesystems of Dos, Windows, and OS/2 do not (fully) support
64             #pod UTF-8. The L function will therefore not be replaced on these
65             #pod systems.
66             #pod
67             #pod =head1 SEE ALSO
68             #pod
69             #pod =for :list
70             #pod * L -- The original module.
71             #pod * L -- Fully utf-8 aware version of the L functions.
72             #pod * L -- Turn on utf-8, all of it.
73             #pod This was also the module I first added the utf-8 aware versions of
74             #pod L and L to before moving them to their own package.
75             #pod
76             #pod =cut
77              
78 4     4   23 use File::Find ();
  4         10  
  4         52  
79 4     4   923 use Encode ();
  4         17326  
  4         569  
80              
81             # Holds the pointers to the original version of redefined functions
82             state %_orig_functions;
83              
84             # Current (i.e., this) package
85             my $current_package = __PACKAGE__;
86              
87             # Original package (i.e., the one for which this module is replacing the functions)
88             my $original_package = $current_package;
89             $original_package =~ s/::utf8$//;
90              
91             require Carp;
92             $Carp::Internal{$current_package}++; # To get warnings reported at correct caller level
93              
94             #pod =attr $File::Find::utf8::SPECIALVARS
95             #pod
96             #pod By default C only decodes the I
97             #pod L variables C<$_>, C<$File::Find::name>,
98             #pod C<$File::Find::dir>, and (when C or C is in
99             #pod effect) C<$File::Find::fullname> for use in the C,
100             #pod C, and C functions.
101             #pod
102             #pod If for any reason (e.g., compatibility with find.pl or find2perl) you
103             #pod also need the I variables C<$File::Find::topdir>,
104             #pod C<$File::Find::topdev>, C<$File::Find::topino>,
105             #pod C<$File::Find::topmode>, and C<$File::Find::topnlink> to be decoded,
106             #pod specify C in your
107             #pod code. The extra decoding that needs to happen will impact performance
108             #pod though, so use only when absolutely necessary.
109             #pod
110             #pod =cut
111              
112             our $SPECIALVARS = 0;
113              
114             #pod =attr $File::Find::utf8::UTF8_CHECK
115             #pod
116             #pod By default C marks decoding errors as fatal (default value
117             #pod for this setting is C). If you want, you can change this by
118             #pod setting C. The value C reports
119             #pod the encoding errors as warnings, and C will completely
120             #pod ignore them. Please see L for details. Note: C is
121             #pod I enforced.
122             #pod =cut
123              
124             our $UTF8_CHECK = Encode::FB_CROAK | Encode::LEAVE_SRC; # Die on encoding errors
125              
126             # UTF-8 Encoding object
127             my $_UTF8 = Encode::find_encoding('UTF-8');
128              
129             sub import {
130             # If run on the dos/os2/windows platform, ignore overriding functions silently.
131             # These platforms do not have (proper) utf-8 file system suppport...
132 8 50   8   401 unless ($^O =~ /MSWin32|cygwin|dos|os2/) {
133 4     4   34 no strict qw(refs); ## no critic (TestingAndDebugging::ProhibitNoStrict)
  4         6  
  4         121  
134 4     4   18 no warnings qw(redefine);
  4         7  
  4         2749  
135              
136             # Redefine each of the functions to their UTF-8 equivalent
137 8         22 for my $f (@{$original_package . '::EXPORT'}, @{$original_package . '::EXPORT_OK'}) {
  8         30  
  8         29  
138             # If we already have the _orig_function, we have redefined the function
139             # in an earlier load of this module, so we need not do it again
140 16 100       47 unless ($_orig_functions{$f}) {
141 8         10 $_orig_functions{$f} = \&{$original_package . '::' . $f};
  8         45  
142 8         13 *{$original_package . '::' . $f} = \&{"_utf8_$f"};
  8         40  
  8         17  
143             }
144             }
145 8         36 $^H{$current_package} = 1; # Set compiler hint that we should use the utf-8 version
146             }
147              
148             # Determine symbols to export
149 8         12 shift; # First argument contains the package (that's us)
150 8 100       22 @_ = (':DEFAULT') if !@_; # If nothing provided, use default
151 8         15 @_ = grep { $_ ne ':none' } @_; # Strip :none tag
  8         24  
152              
153             # Use exporter to export
154 8         31 require Exporter;
155 8 100       21 if (@_) {
156 7         15 @_ = ($original_package, @_);
157 7         2062 goto &Exporter::import;
158             }
159              
160 1         74 return;
161             }
162              
163             sub unimport { ## no critic (Subroutines::ProhibitBuiltinHomonyms)
164 1     1   10 $^H{$current_package} = 0; # Set compiler hint that we should not use the utf-8 version
165 1         1302 return;
166             }
167              
168             sub _utf8_find {
169 15     15   9856 my $ref = shift; # This can be the wanted function or a find options hash
170             # Make argument always into the find's options hash
171 15 100       65 my %find_options_hash = ref($ref) eq "HASH" ? %$ref : (wanted => $ref);
172              
173             # Holds the (possibly encoded) arguments
174 15         32 my @args = @_;
175              
176             # Get the hint from the caller (one level deeper if called from finddepth)
177 15 100 50     70 my $hints = ((caller 1)[3]//'') ne 'File::Find::utf8::_utf8_finddepth' ? (caller 0)[10] : (caller 1)[10];
178 15 100       646 if ($hints->{$current_package}) {
179 12 100       38 $UTF8_CHECK |= Encode::LEAVE_SRC if $UTF8_CHECK; # Enforce LEAVE_SRC
180              
181             # Save original processors
182 12         16 my %org_proc;
183 12         23 for my $proc ("wanted", "preprocess", "postprocess") { $org_proc{$proc} = $find_options_hash{$proc}; }
  36         60  
184             my $follow_option = (exists $find_options_hash{follow} && $find_options_hash{follow})
185 12   33     66 || (exists $find_options_hash{follow_fast} && $find_options_hash{follow_fast});
186              
187             # Wrap processors to become utf8-aware
188 12 50 33     43 if (defined $org_proc{wanted} && ref $org_proc{wanted}) {
189             $find_options_hash{wanted} = sub {
190             # Decode the file variables so they become characters
191 15 50   15   450 local $_ = $_UTF8->decode($_, $UTF8_CHECK) if $_;
192 15 50       106 local $File::Find::name = $_UTF8->decode($File::Find::name, $UTF8_CHECK) if $File::Find::name;
193 15 50       74 local $File::Find::dir = $_UTF8->decode($File::Find::dir, $UTF8_CHECK) if $File::Find::dir;
194 15 0 33     57 local $File::Find::fullname = $_UTF8->decode($File::Find::fullname, $UTF8_CHECK) if $follow_option && $File::Find::fullname;
195             # These are only necessary for compatibility reasons (find.pl, find2perl).
196             # If you need them, set $File::Find::utf8::SPECIALVARS
197 15 0 33     24 local $File::Find::topdir = $_UTF8->decode($File::Find::topdir, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topdir;
198 15 0 33     22 local $File::Find::topdev = $_UTF8->decode($File::Find::topdev, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topdev;
199 15 0 33     30 local $File::Find::topino = $_UTF8->decode($File::Find::topino, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topino;
200 15 0 33     26 local $File::Find::topmode = $_UTF8->decode($File::Find::topmode, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topmode;
201 15 0 33     22 local $File::Find::topnlink = $_UTF8->decode($File::Find::topnlink, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topnlink;
202 15 0       40 return $org_proc{wanted}->(map { $_ ? $_UTF8->decode($_, $UTF8_CHECK) : $_ } @_);
  0         0  
203 12         58 };
204             }
205 12         25 for my $proc ("preprocess", "postprocess") {
206 24 100 66     55 if (defined $org_proc{$proc} && ref $org_proc{$proc}) {
207             $find_options_hash{$proc} = sub {
208             # Decode the file variables so they become characters
209 3 50   3   86 local $File::Find::dir = $_UTF8->decode($File::Find::dir, $UTF8_CHECK) if $File::Find::dir;
210             # Decode the arguments and encode the results
211 3 0       23 return map { $_ ? $_UTF8->encode($_, $UTF8_CHECK) : $_ } $org_proc{$proc}->(map { $_ ? $_UTF8->decode($_, $UTF8_CHECK) : $_ } @_);
  0 50       0  
  72         310  
212 3         9 };
213             }
214             }
215             # Encode arguments as utf-8 so that the original File::Find receives bytes
216 12 50       24 @args = map { $_ ? $_UTF8->encode($_, $UTF8_CHECK) : $_ } @_;
  12         104  
217             }
218              
219             # Make sure warning level propagates to File::Find
220             # Note: on perl prior to v5.12 warnings_fatal_enabled does not exist
221             # so we can not use it.
222 14 100 66     607 if (!warnings::enabled('File::Find')) {
    100          
223 4     4   26 no warnings 'File::Find';
  4         8  
  4         265  
224 3         747 return $_orig_functions{find}->(\%find_options_hash, @args);
225             } elsif (!exists &warnings::fatal_enabled or !warnings::fatal_enabled('File::Find')) {
226 4     4   22 use warnings 'File::Find';
  4         8  
  4         212  
227 10         2153 return $_orig_functions{find}->(\%find_options_hash, @args);
228             } else {
229 4     4   32 use warnings FATAL => qw(File::Find);
  4         7  
  4         580  
230 1         314 return $_orig_functions{find}->(\%find_options_hash, @args);
231             }
232             }
233              
234             sub _utf8_finddepth {
235 4     4   4681 my $ref = shift; # This can be the wanted function or a find options hash
236 4 50       24 return _utf8_find( { bydepth => 1, ref($ref) eq "HASH" ? %$ref : (wanted => $ref) }, @_);
237             }
238              
239             1;
240              
241             __END__