File Coverage

blib/lib/Encode/Safename.pm

Criterion	Covered	Total	%
statement	41	41	100.0
branch	4	6	66.6
condition	2	3	66.6
subroutine	10	10	100.0
pod	2	2	100.0
total	59	62	95.1

line	stmt	bran	cond	sub	pod	time	code
1							package Encode::Safename;
2
3	2			2		58194	use 5.006;
	2					9
	2					84
4	2			2		14	use strict;
	2					4
	2					72
5	2			2		11	use warnings FATAL => 'all';
	2					8
	2					88
6	2			2		976	use utf8;
	2					12
	2					12
7
8	2			2		1790	use Parse::Lex;
	2					59892
	2					98
9
10	2			2		29	use base qw(Encode::Encoding);
	2					5
	2					1192
11
12							use constant {
13	2					1843	COND_ENCODE => 'ENCODE',
14							COND_DECODE => 'DECODE',
15	2			2		13002	};
	2					5
16
17							__PACKAGE__->Define(qw(safename));
18
19							=head1 NAME
20
21							Encode::Safename - An encoding for safe filenames.
22
23							=head1 VERSION
24
25							Version 0.05
26
27							=cut
28
29							our $VERSION = '0.05';
30
31							=head1 SYNOPSIS
32
33							An encoding to encode filenames to safe filenames, that is filenames
34							that are valid on all filesystems.
35
36							use Encode qw(decode encode);
37							use Encode::Safename;
38
39							$encoded = encode('safename', 'Foo Bar Baz.txt');
40							# $encoded is now '{f}oo_{b}ar_{b}az.txt'
41							$decoded = decode('safename', $encoded);
42							# $decoded is now 'Foo Bar Baz.txt'
43
44							=head1 DESCRIPTION
45
46							A filename is encoded as follows:
47
48							=over 4
49
50							=item *
51
52							A range of uppercase characters is changed to lowercase characters,
53							and put between braces.
54
55							'F' -> '{F}'
56							'FOO' -> '{foo}'
57
58							=item *
59
60							A range of spaces is changed to underscores.
61
62							' ' -> '_'
63							' ' -> '___'
64
65							=item *
66
67							A range of safe characters (characters that are valid on all filesystems,
68							excluding braces, parentheses, and underscores) is left unchanged.
69
70							'f' -> 'f'
71							'foo' -> 'foo'
72
73							=item *
74
75							All other characters are changed to their Unicode codepoint in hexadecimal
76							notation, and put between parentheses.
77
78							':' -> '(3a)'
79							':?' -> '(3a)(3f)'
80
81							=back
82
83							Combined, this gives the following:
84
85							'FOO: Bar Baz.txt' -> '{foo}(3a)_{b}ar_{b}az.txt'
86
87							=head1 METHODS
88
89							=head2 _process LEXER, STRING
90
91							Applies LEXER to STRING. Returns both the processed and unprocessed
92							parts.
93
94							For internal use only!
95
96							=cut
97
98							Parse::Lex->inclusive('ENCODE', 'DECODE');
99							my $_lexer = Parse::Lex->new(
100							# uppercase characters
101							'ENCODE:E_UPPER' => (
102							'[A-Z]+',
103							sub {
104							return '{' . lc $_[1] . '}';
105							},
106							),
107							'DECODE:D_UPPER' => (
108							'\{[a-z]+\}',
109							sub {
110							my $text = $_[1];
111							$text =~ s/\{(.*)\}/$1/;
112							return uc $text;
113							},
114							),
115
116							# spaces
117							'ENCODE:E_SPACES' => (
118							' +',
119							sub {
120							my $text = $_[1];
121							$text =~ tr/ /_/;
122							return $text;
123							},
124							),
125							'DECODE:D_SPACES' => (
126							'_+',
127							sub {
128							my $text = $_[1];
129							$text =~ tr/_/ /;
130							return $text;
131							},
132							),
133
134							# safe characters
135							'SAFE' => '[a-z0-9\-+!\$%&\'@~#.,^]+',
136
137							# other characters
138							'ENCODE:E_OTHER' => (
139							'.',
140							sub {
141							return '(' . sprintf('%x', unpack('U', $_[1])) . ')';
142							},
143							),
144							'DECODE:D_OTHER' => (
145							'$[0-9a-f]+$',
146							sub {
147							my $text = $_[1];
148							$text =~ s/$(.*)$/$1/;
149							return pack('U', oct('0x' . $text));
150							},
151							),
152							);
153							$_lexer->skip('');
154
155							sub _process {
156							# process arguments
157	22			22		42	my ($self, $string, $condition) = @_;
158
159							# initialize the lexer and the processed buffer
160	22					92	$_lexer->from($string);
161	22					10450	$_lexer->start($condition);
162	22					220	my $processed = '';
163
164	22					31	while (1) {
165							# infinite loop!
166
167							# get the next token
168	72					413	my $token = $_lexer->next;
169
170	72	100	66			3175	if ($_lexer->eoi \|\| (! $token)) {
171							# no more tokens; jump out of the loop
172	22					139	last;
173							}
174							else {
175							# add the token's text to the processed buffer
176	50					454	$processed .= $token->text;
177							}
178							}
179
180							# return the both the processed and unprocessed parts
181	22					68	my $unprocessed = substr $string, $_lexer->offset;
182	22					165	$_lexer->start('INITIAL');
183	22					562	return ($processed, $unprocessed);
184							}
185
186							=head2 decode STRING, CHECK
187
188							Decoder for decoding safename. See module L.
189
190							=cut
191
192							sub decode {
193							# process arguments
194	11			11	1	6094	my ($self, $string, $check) = @_;
195
196							# apply the lexer for decoding to the string and return the result
197	11					31	my ($processed, $unprocessed) = $self->_process($string, COND_DECODE);
198	11	50				34	$_[1] = $unprocessed if $check;
199	11					36	return $processed;
200							}
201
202							=head2 encode STRING, CHECK
203
204							Encoder for encoding safename. See module L.
205
206							=cut
207
208							sub encode {
209							# process arguments
210	11			11	1	4736	my ($self, $string, $check) = @_;
211
212							# apply the lexer for encoding to the string and return the result
213	11					31	my ($processed, $unprocessed) = $self->_process($string, COND_ENCODE);
214	11	50				31	$_[1] = $unprocessed if $check;
215	11					35	return $processed;
216							}
217
218							=head1 AUTHOR
219
220							Bert Vanderbauwhede, C<< >>
221
222							=head1 BUGS
223
224							Please report any bugs or feature requests to C
225							at rt.cpan.org>, or through the web interface at
226							L.
227							I will be notified, and then you'll automatically be notified of progress
228							on your bug as I make changes.
229
230							=head1 SUPPORT
231
232							You can find documentation for this module with the perldoc command.
233
234							perldoc Encode::Safename
235
236							You can also look for information at:
237
238							=over 4
239
240							=item * RT: CPAN's request tracker (report bugs here)
241
242							L
243
244							=item * AnnoCPAN: Annotated CPAN documentation
245
246							L
247
248							=item * CPAN Ratings
249
250							L
251
252							=item * Search CPAN
253
254							L
255
256							=back
257
258							=head1 ACKNOWLEDGEMENTS
259
260							Based on the module safefilename from Torsten Bronger's Bobcat project
261							(L).
262
263							=head1 LICENSE AND COPYRIGHT
264
265							Copyright 2014 Bert Vanderbauwhede.
266
267							This program is free software; you can redistribute it and/or modify it
268							under the terms of the GNU Lesser General Public License as published
269							by the Free Software Foundation, either version 3 of the License, or
270							(at your option) any later version.
271
272							See L for more information.
273
274							=cut
275
276							1; # End of Encode::Safename