File Coverage

blib/lib/File/Find/utf8.pm
Criterion Covered Total %
statement 86 88 97.7
branch 29 54 53.7
condition 13 32 40.6
subroutine 16 16 100.0
pod n/a
total 144 190 75.7


line stmt bran cond sub pod time code
1             package File::Find::utf8;
2 4     4   95476 use strict;
  4         10  
  4         132  
3 4     4   24 use warnings;
  4         9  
  4         103  
4 4     4   92 use 5.010; # state
  4         16  
5              
6             # ABSTRACT: Fully UTF-8 aware File::Find
7             our $VERSION = '0.013'; # VERSION
8              
9             #pod =begin :prelude
10             #pod
11             #pod =for test_synopsis
12             #pod my @directories_to_search;
13             #pod
14             #pod =end :prelude
15             #pod
16             #pod =head1 SYNOPSIS
17             #pod
18             #pod # Use the utf-8 versions of find and finddepth
19             #pod use File::Find::utf8;
20             #pod find(\&wanted, @directories_to_search);
21             #pod
22             #pod # Revert back to non-utf-8 versions
23             #pod no File::Find::utf8;
24             #pod finddepth(\&wanted, @directories_to_search);
25             #pod
26             #pod # Export only the find function
27             #pod use File::Find::utf8 qw(find);
28             #pod find(\&wanted, @directories_to_search);
29             #pod
30             #pod # Export no functions
31             #pod use File::Find::utf8 qw(:none); # NOT "use File::Find::utf8 qw();"!
32             #pod File::Find::find(\&wanted, @directories_to_search);
33             #pod
34             #pod =head1 DESCRIPTION
35             #pod
36             #pod While the original L functions are capable of handling
37             #pod UTF-8 quite well, they expect and return all data as bytes, not as
38             #pod characters.
39             #pod
40             #pod This module replaces the L functions with fully UTF-8
41             #pod aware versions, both expecting and returning characters.
42             #pod
43             #pod B Replacement of functions is not done on DOS, Windows, and OS/2
44             #pod as these systems do not have full UTF-8 file system support.
45             #pod
46             #pod =head2 Behaviour
47             #pod
48             #pod The module behaves as a pragma so you can use both C
49             #pod File::Find::utf8> and C to turn utf-8 support on
50             #pod or off.
51             #pod
52             #pod By default, both find() and finddepth() are exported (as with the original
53             #pod L), if you want to prevent this, use C
54             #pod qw(:none)>. (As all the magic happens in the module's import function,
55             #pod you can not simply use C)
56             #pod
57             #pod L warning levels are properly propagated. Note though that
58             #pod for propagation of fatal L warnings, Perl 5.12 or higher
59             #pod is required (or the appropriate version of L).
60             #pod
61             #pod =head1 COMPATIBILITY
62             #pod
63             #pod The filesystems of Dos, Windows, and OS/2 do not (fully) support
64             #pod UTF-8. The L function will therefore not be replaced on these
65             #pod systems.
66             #pod
67             #pod =head1 SEE ALSO
68             #pod
69             #pod =for :list
70             #pod * L -- The original module.
71             #pod * L -- Fully utf-8 aware version of the L functions.
72             #pod * L -- Turn on utf-8, all of it.
73             #pod This was also the module I first added the utf-8 aware versions of
74             #pod L and L to before moving them to their own package.
75             #pod
76             #pod =cut
77              
78 4     4   24 use File::Find ();
  4         10  
  4         66  
79 4     4   892 use Encode ();
  4         15948  
  4         655  
80              
81             # Holds the pointers to the original version of redefined functions
82             state %_orig_functions;
83              
84             # Current (i.e., this) package
85             my $current_package = __PACKAGE__;
86              
87             # Original package (i.e., the one for which this module is replacing the functions)
88             my $original_package = $current_package;
89             $original_package =~ s/::utf8$//;
90              
91             require Carp;
92             $Carp::Internal{$current_package}++; # To get warnings reported at correct caller level
93              
94             #pod =attr $File::Find::utf8::SPECIALVARS
95             #pod
96             #pod By default C only decodes the I
97             #pod L variables C<$_>, C<$File::Find::name>,
98             #pod C<$File::Find::dir>, and (when C or C is in
99             #pod effect) C<$File::Find::fullname> for use in the C,
100             #pod C, and C functions.
101             #pod
102             #pod If for any reason (e.g., compatibility with find.pl or find2perl) you
103             #pod also need the I variables C<$File::Find::topdir>,
104             #pod C<$File::Find::topdev>, C<$File::Find::topino>,
105             #pod C<$File::Find::topmode>, and C<$File::Find::topnlink> to be decoded,
106             #pod specify C in your
107             #pod code. The extra decoding that needs to happen will impact performance
108             #pod though, so use only when absolutely necessary.
109             #pod
110             #pod =cut
111              
112             our $SPECIALVARS = 0;
113              
114             #pod =attr $File::Find::utf8::UTF8_CHECK
115             #pod
116             #pod By default C marks decoding errors as fatal (default value
117             #pod for this setting is C). If you want, you can change this by
118             #pod setting C. The value C reports
119             #pod the encoding errors as warnings, and C will completely
120             #pod ignore them. Please see L for details. Note: C is
121             #pod I enforced.
122             #pod =cut
123              
124             our $UTF8_CHECK = Encode::FB_CROAK | Encode::LEAVE_SRC; # Die on encoding errors
125              
126             # UTF-8 Encoding object
127             my $_UTF8 = Encode::find_encoding('UTF-8');
128              
129             sub import {
130             # Target package (i.e., the one loading this module)
131 8     8   1155 my $target_package = caller;
132              
133             # If run on the dos/os2/windows platform, ignore overriding functions silently.
134             # These platforms do not have (proper) utf-8 file system suppport...
135 8 50       161 unless ($^O =~ /MSWin32|cygwin|dos|os2/) {
136 4     4   907 no strict qw(refs); ## no critic (TestingAndDebugging::ProhibitNoStrict)
  4         9  
  4         127  
137 4     4   18 no warnings qw(redefine);
  4         8  
  4         2348  
138              
139             # Redefine each of the functions to their UTF-8 equivalent
140 8         43 for my $f (@{$original_package . '::EXPORT'}, @{$original_package . '::EXPORT_OK'}) {
  8         32  
  8         35  
141             # If we already have the _orig_function, we have redefined the function
142             # in an earlier load of this module, so we need not do it again
143 16 100       47 unless ($_orig_functions{$f}) {
144 8         12 $_orig_functions{$f} = \&{$original_package . '::' . $f};
  8         33  
145 8         11 *{$original_package . '::' . $f} = \&{"_utf8_$f"};
  8         33  
  8         18  
146             }
147             }
148 8         33 $^H{$current_package} = 1; # Set compiler hint that we should use the utf-8 version
149             }
150              
151             # Determine symbols to export
152 8         14 shift; # First argument contains the package (that's us)
153 8 100       29 @_ = (':DEFAULT') if !@_; # If nothing provided, use default
154 8         16 @_ = grep { $_ ne ':none' } @_; # Strip :none tag
  8         25  
155              
156             # Use exporter to export
157 8         37 require Exporter;
158 8 100       646 Exporter::export_to_level($original_package, 1, $target_package, @_) if (@_);
159              
160 7         1412 return;
161             }
162              
163             sub unimport { ## no critic (Subroutines::ProhibitBuiltinHomonyms)
164 1     1   9 $^H{$current_package} = 0; # Set compiler hint that we should not use the utf-8 version
165 1         1081 return;
166             }
167              
168             sub _utf8_find {
169 15     15   10191 my $ref = shift; # This can be the wanted function or a find options hash
170             # Make argument always into the find's options hash
171 15 100       73 my %find_options_hash = ref($ref) eq "HASH" ? %$ref : (wanted => $ref);
172              
173             # Holds the (possibly encoded) arguments
174 15         45 my @args = @_;
175              
176             # Get the hint from the caller (one level deeper if called from finddepth)
177 15 100 50     86 my $hints = ((caller 1)[3]//'') ne 'File::Find::utf8::_utf8_finddepth' ? (caller 0)[10] : (caller 1)[10];
178 15 100       781 if ($hints->{$current_package}) {
179 12 100       51 $UTF8_CHECK |= Encode::LEAVE_SRC if $UTF8_CHECK; # Enforce LEAVE_SRC
180              
181             # Save original processors
182 12         22 my %org_proc;
183 12         28 for my $proc ("wanted", "preprocess", "postprocess") { $org_proc{$proc} = $find_options_hash{$proc}; }
  36         73  
184             my $follow_option = (exists $find_options_hash{follow} && $find_options_hash{follow})
185 12   33     75 || (exists $find_options_hash{follow_fast} && $find_options_hash{follow_fast});
186              
187             # Wrap processors to become utf8-aware
188 12 50 33     67 if (defined $org_proc{wanted} && ref $org_proc{wanted}) {
189             $find_options_hash{wanted} = sub {
190             # Decode the file variables so they become characters
191 15 50   15   356 local $_ = $_UTF8->decode($_, $UTF8_CHECK) if $_;
192 15 50       120 local $File::Find::name = $_UTF8->decode($File::Find::name, $UTF8_CHECK) if $File::Find::name;
193 15 50       102 local $File::Find::dir = $_UTF8->decode($File::Find::dir, $UTF8_CHECK) if $File::Find::dir;
194 15 0 33     79 local $File::Find::fullname = $_UTF8->decode($File::Find::fullname, $UTF8_CHECK) if $follow_option && $File::Find::fullname;
195             # These are only necessary for compatibility reasons (find.pl, find2perl).
196             # If you need them, set $File::Find::utf8::SPECIALVARS
197 15 0 33     32 local $File::Find::topdir = $_UTF8->decode($File::Find::topdir, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topdir;
198 15 0 33     31 local $File::Find::topdev = $_UTF8->decode($File::Find::topdev, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topdev;
199 15 0 33     37 local $File::Find::topino = $_UTF8->decode($File::Find::topino, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topino;
200 15 0 33     32 local $File::Find::topmode = $_UTF8->decode($File::Find::topmode, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topmode;
201 15 0 33     32 local $File::Find::topnlink = $_UTF8->decode($File::Find::topnlink, $UTF8_CHECK) if $SPECIALVARS && $File::Find::topnlink;
202 15 0       41 return $org_proc{wanted}->(map { $_ ? $_UTF8->decode($_, $UTF8_CHECK) : $_ } @_);
  0         0  
203 12         58 };
204             }
205 12         27 for my $proc ("preprocess", "postprocess") {
206 24 100 66     76 if (defined $org_proc{$proc} && ref $org_proc{$proc}) {
207             $find_options_hash{$proc} = sub {
208             # Decode the file variables so they become characters
209 3 50   3   64 local $File::Find::dir = $_UTF8->decode($File::Find::dir, $UTF8_CHECK) if $File::Find::dir;
210             # Decode the arguments and encode the results
211 3 0       28 return map { $_ ? $_UTF8->encode($_, $UTF8_CHECK) : $_ } $org_proc{$proc}->(map { $_ ? $_UTF8->decode($_, $UTF8_CHECK) : $_ } @_);
  0 50       0  
  69         375  
212 3         10 };
213             }
214             }
215             # Encode arguments as utf-8 so that the original File::Find receives bytes
216 12 50       27 @args = map { $_ ? $_UTF8->encode($_, $UTF8_CHECK) : $_ } @_;
  12         117  
217             }
218              
219             # Make sure warning level propagates to File::Find
220             # Note: on perl prior to v5.12 warnings_fatal_enabled does not exist
221             # so we can not use it.
222 14 100 66     629 if (!warnings::enabled('File::Find')) {
    100          
223 4     4   27 no warnings 'File::Find';
  4         8  
  4         241  
224 3         877 return $_orig_functions{find}->(\%find_options_hash, @args);
225             } elsif (!exists &warnings::fatal_enabled or !warnings::fatal_enabled('File::Find')) {
226 4     4   20 use warnings 'File::Find';
  4         11  
  4         213  
227 10         2214 return $_orig_functions{find}->(\%find_options_hash, @args);
228             } else {
229 4     4   47 use warnings FATAL => qw(File::Find);
  4         7  
  4         492  
230 1         354 return $_orig_functions{find}->(\%find_options_hash, @args);
231             }
232             }
233              
234             sub _utf8_finddepth {
235 4     4   4231 my $ref = shift; # This can be the wanted function or a find options hash
236 4 50       27 return _utf8_find( { bydepth => 1, ref($ref) eq "HASH" ? %$ref : (wanted => $ref) }, @_);
237             }
238              
239             1;
240              
241             __END__