File Coverage

blib/lib/String/MFN.pm
Criterion Covered Total %
statement 32 32 100.0
branch 3 4 75.0
condition n/a
subroutine 5 5 100.0
pod 1 1 100.0
total 41 42 97.6


line stmt bran cond sub pod time code
1             package String::MFN;
2              
3             require 5.008;
4 2     2   73479 use warnings;
  2         6  
  2         61  
5 2     2   11 use strict;
  2         5  
  2         70  
6              
7 2     2   2098 use Encode;
  2         28986  
  2         427  
8              
9             require Exporter;
10             our @ISA = qw( Exporter );
11             our @EXPORT = qw( &mfn );
12              
13             =head1 NAME
14              
15             String::MFN - Normalize a string to produce a sane Unix filename
16              
17             =head1 VERSION
18              
19             Version 1.29
20              
21             =cut
22              
23             our $VERSION = '1.29';
24              
25             =head1 SYNOPSIS
26              
27             use String::MFN;
28              
29             my $clean = mfn($dirty);
30              
31             =head1 DESCRIPTION
32              
33             String::MFN exports a single function, C, which modifies a
34             string to resemble a sane Unix filename.
35              
36             In a nutshell, this means lowercasing everything and either getting
37             rid of "funny" characters or replacing them with sane equivalents
38             which allow the string to maintain some structure. See the test suite
39             for a battery of examples.
40              
41             =head1 FUNCTIONS
42              
43             =head2 mfn
44              
45             Normalizes a string. Returns the normalized string. If no argument is
46             given, C<$_> is used.
47              
48             =cut
49              
50             sub mfn {
51 67 50   67 1 193 my $string = ( @_ ? $_[0] : $_ );
52 67         138 Encode::_utf8_on($string);
53              
54             # phase 1 - sanitize
55 2     2   2099 $string =~ s/(\p{Lowercase})(\p{Uppercase})/$1_$2/g; # inCap to in_Cap
  2         20  
  2         27  
  67         334  
56 67         154 $string =~ s/[\{\[\(\<>)\]\}~\|\/]/-/g; # {[(<>)]}~|/ to '-'
57 67         195 $string =~ s/[\p{Zs}\t]+/_/g; # whitespace to '_'
58 67         111 $string =~ s/\&+/_and_/g; # '&' to "_and_"
59 67         162 $string =~ s/[^\p{Alphabetic}\p{Nd}\-\.\+_]//g; # drop not-word chars
60              
61             # phase 2 - condense
62 67         142 $string =~ s/_+-+/-/g; # collapse _- sequences
63 67         115 $string =~ s/-+_+/-/g; # collapse -_ sequences
64 67         133 $string =~ s/[\-\_]+\././g; # collapse [-_]. sequences
65 67         190 $string =~ s/\.[\-\_]+/./g; # collapse .[-_] sequences
66 67         133 $string =~ s/\-{2,}/-/g; # collapse repeating -,
67 67         94 $string =~ s/\_{2,}/_/g; # _,
68 67         149 $string =~ s/\.{2,}/./g; # and .
69 67         151 $string =~ s/^(\-|\_|\.)+//; # remove leading -_.
70 67         303 $string =~ s/(\-|\_|\.)+$//; # remove trailing -_. (rare)
71 67 100       226 if ($string =~ /\.(\w+?)$/) { # collapse repeating extensions
72 20         37 my $ext = $1;
73 20         326 $string =~ s/(\.$ext)+$/\.$ext/;
74             }
75              
76 67         341 return lc($string); # slam lowercase
77             }
78              
79             =head1 TODO
80              
81             =over
82              
83             =item *
84              
85             Add "classic" ASCII-oriented function for extra strictness
86              
87             =item *
88              
89             Add track/sequence number stuff to mfn(1p)
90              
91             =back
92              
93             =head1 BUGS
94              
95             =over
96              
97             =item *
98              
99             C forces Perl's C<_is_utf8> flag on, but does not attempt to
100             verify that the data being passed to it is valid UTF-8.
101              
102             =back
103              
104             Please report any bugs or feature requests to
105             C, or through the web interface at
106             L. I will be notified, and then you'll automatically
107             be notified of progress on your bug as I make changes.
108              
109             =head1 AUTHOR
110              
111             Shawn Boyette, C<< >>
112              
113             =head1 COPYRIGHT & LICENSE
114              
115             Copyright 2003-2007 Shawn Boyette, All Rights Reserved.
116              
117             This program is free software; you can redistribute it and/or modify it
118             under the same terms as Perl itself.
119              
120             =cut
121              
122             1; # End of String::MFN