File Coverage

blib/lib/String/Clean.pm
Criterion Covered Total %
statement 71 72 98.6
branch 21 26 80.7
condition 12 18 66.6
subroutine 14 14 100.0
pod 6 6 100.0
total 124 136 91.1


line stmt bran cond sub pod time code
1             package String::Clean;
2             BEGIN {
3 3     3   115906 $String::Clean::VERSION = '0.031';
4             }
5              
6 3     3   27 use warnings;
  3         6  
  3         173  
7 3     3   21 use strict;
  3         7  
  3         250  
8 3     3   5482 use Carp::Assert::More;
  3         29979  
  3         4979  
9              
10             # ABSTRACT: use data objects to clean strings
11              
12             =head1 SYNOPSIS
13              
14             The goal of this module is to assist in the drudgery of string cleaning by
15             allowing data objects to define what and how to clean.
16              
17             =head2 EXAMPLES
18              
19             use String::Clean;
20              
21             my $clean = String::Clean->new();
22              
23             $clean->replace( { this => 'that', is => 'was' } , 'this is a test' );
24             # returns 'that was a test'
25            
26             # see the tests for more examples
27              
28             =head1 THE OPTIONS HASH
29              
30             Each function can take an optonal hash that will change it's behaviour. This
31             hash can be passed to new and will change the defaults, or you can pass to each
32             call as needed.
33              
34             opt:
35             Any regex options that you want to pass, ie {opt => 'i'} will allow
36             for case insensitive manipulation.
37             replace :
38             If the value is set to 'word' then the replace function will look for
39             words instead of just a collection of charicters.
40             example:
41              
42             replace( { is => 'was' },
43             'this is a test',
44             );
45              
46             returns 'thwas was a test', where
47              
48             replace( { is => 'was' },
49             'this is a test',
50             { replace => 'word' },
51             );
52              
53             will return 'this was a test'
54              
55             strip :
56             Just like replace, if the value is set to 'word' then strip will look
57             for words instead of just a collection of charicters.
58              
59             word_ boundary :
60             Hook to change what String::Clean will use as the word boundry, by
61             default it will use '\b'. Mainly this would allow String::Clean to
62             deal with strings like 'this,is,a,test'.
63              
64             escape :
65             If this is set to 'no' then String::Clean will not try to escape any
66             of the things that you've asked it to look for.
67              
68             You can also override options at the function level again, but this happens as
69             merged hash, for example:
70              
71             my $clean = String::Clean->new({replace => 'word', opt => 'i'});
72             $clean->strip( [qw{a}], 'an Array', {replace =>'non-word'} );
73             #returns 'n rray' because opt => 'i' was pulled in from the options at new.
74            
75              
76             =head1 CORE FUNCTIONS
77              
78             =head2 new
79              
80             The only thing exciting here is that you can pass the same options hash at
81             construction, and this will cascade down to each function call.
82              
83             =cut
84              
85             #---------------------------------------------------------------------------
86             # NEW
87             #---------------------------------------------------------------------------
88             sub new {
89 5     5 1 27 my ( $class, $opt ) = @_;
90 5         289 my $self = {};
91 5 100       24 if ( ref($opt) eq 'HASH' ) {
92 1         3 $self->{opt} = $opt;
93 1         6 $self->{yaml}= {};
94             }
95 5         30 return bless $self, $class;
96             }
97              
98             #---------------------------------------------------------------------------
99             # REPLACE
100             #---------------------------------------------------------------------------
101              
102             =head2 replace
103              
104             Takes a hash where the key is what to look for and the value is what to replace
105             the key with.
106              
107             replace( $hash, $string, $opts );
108              
109             =cut
110              
111             sub replace {
112 10     10 1 641 my ( $self, $hash, $string , $opt) = @_;
113 10         37 assert_hashref($hash);
114 10         173 assert_defined($string);
115 10         51 $opt = $self->_check_for_opt($opt);
116 10         42 my $o = _build_opt( $opt->{opt} );
117 10         41 my $b = _boundary( $opt->{word_boundary} );
118              
119 10         43 foreach my $key ( keys(%$hash) ) {
120 15 50 33     72 my $qmkey = quotemeta($key) unless ( defined($opt->{escape}) && $opt->{escape} =~ m/^no$/ );
121 15 50       34 next unless defined $qmkey;
122            
123 15 100 66     82 if ( defined($opt->{replace})
124             && $opt->{replace} =~ m/^word$/i
125             ) {
126 11         432 $string =~ s/(^|$b)$qmkey($b|$)/$1$hash->{$key}$2/g;
127             }
128             else {
129 4         153 $string =~ s/(?$o)$qmkey/$hash->{$key}/g;
130             }
131             }
132 10         60 return $string;
133             }
134              
135             =head2 replace_word
136              
137             A shortcut that does the same thing as passing {replace => 'word'} to replace.
138              
139             replace_word( $hash, $string, $opts );
140              
141             =cut
142              
143             sub replace_word {
144 1     1 1 3 my ( $self, $hash, $string , $opt) = @_;
145 1         3 $opt->{replace} = 'word';
146 1         3 return $self->replace($hash, $string, $opt);
147             }
148              
149              
150             #---------------------------------------------------------------------------
151             # STRIP
152             #---------------------------------------------------------------------------
153              
154             =head2 strip
155              
156             Takes an arrayref of items to completely remove from the string.
157              
158             strip( $list, $sring, $opt);
159              
160             =cut
161              
162             sub strip {
163 8     8 1 22 my ( $self, $list, $string , $opt) = @_;
164 8         33 assert_listref($list);
165 8         2152 assert_defined($string);
166 8         39 $opt = $self->_check_for_opt($opt);
167 8         36 my $o = _build_opt($opt->{opt});
168 8         33 my $b = _boundary( $opt->{word_boundary} );
169 8 50 33     42 $list = [map{ quotemeta } @$list ] unless ( defined($opt->{escape}) && $opt->{escape} =~ m/^no$/ );
  12         43  
170 3         30 my $s = ( defined($opt->{strip})
171             && $opt->{strip} =~ m/^word$/i
172             )
173 8 100 66     52 ? join '|', map{ $b.$_.$b } @$list
174             : join '|', @$list
175             ;
176 8         195 $string =~ s/(?$o)(?:$s)//g;
177 8         49 return $string;
178             }
179              
180             =head2 strip_word
181              
182             A shortcut that does the same thing as passing {strip => 'word'} to strip.
183              
184             strip_word( $list, $string, $opt);
185              
186             =cut
187              
188             sub strip_word {
189 1     1 1 2 my ( $self, $list, $string , $opt) = @_;
190 1         4 $opt->{strip} = 'word';
191 1         5 return $self->strip($list, $string, $opt);
192             }
193              
194              
195             #---------------------------------------------------------------------------
196             # CLEAN BY YAML
197             #---------------------------------------------------------------------------
198              
199             =head1 WRAPPING THINGS UP AND USING YAML
200              
201             =head2 clean_by_yaml
202              
203             Because we have to basic functions that take two seperate data types... why
204             not wrap those up, enter YAML.
205              
206             clean_by_yaml( $yaml, $string, $opt );
207              
208             But how do we do that? Heres an example:
209              
210             =head3 OLD CODE
211              
212             $string = 'this is still just a example for the YAML stuff';
213             $string =~ s/this/that/;
214             $string =~ s/is/was/;
215             $string =~ s/\ba\b/an/;
216             $string =~ s/still//;
217             $string =~ s/for/to explain/;
218             $string =~ s/\s\s/ /g;
219             # 'that was just an example to explain the YAML stuff'
220              
221             =head3 NEW CODE
222              
223             $string = 'this is still just a example for the YAML stuff';
224             $yaml = q{
225             ---
226             this : that
227             is : was
228             a : an
229             ---
230             - still
231             ---
232             for : to explain
233             ' ': ' '
234             };
235             $string = $clean->clean_by_yaml( $yaml, $string, { replace => 'word' } );
236             # 'that was just an example to explain the YAML stuff'
237              
238             =head3 ISSUES TO WATCH FOR:
239              
240             =over
241              
242             =item * Order matters:
243              
244             As you can see in the example we have 3 seperate YAML docs, this allows for
245             replaces to be doene in a specific sequence, if that is needed. Here in this
246             example is would not have mattered that much, here's a better example:
247              
248             #swap all instances of 'ctrl' and 'alt'
249             $yaml = q{
250             ---
251             ctrl : __was_ctrl__
252             ---
253             alt : ctrl
254             ---
255             __was_ctrl__ : alt
256             };
257              
258             =item * Options are global to the YAML doc :
259            
260             If you need to have seperate options applied to seperate sets then they
261             will have to happen as seprate calls.
262              
263             =back
264              
265             =cut
266              
267             sub clean_by_yaml {
268 3     3   5603 use YAML::Any;
  3         5953  
  3         22  
269 4     4 1 28 my ( $self, $yaml, $string, $opt) = @_;
270 4         14 assert_defined($yaml);
271 4         19 assert_defined($string);
272             #$opt = $self->_check_for_opt($opt);
273 4 50       41 $self->{yaml}->{$yaml} = [Load($yaml)]
274             unless defined $self->{yaml}->{$yaml};
275 4         81439 $opt = $self->_check_for_opt($opt);
276 4         8 foreach my $doc (@{ $self->{yaml}->{$yaml} }) {
  4         17  
277 6 100       29 if ( ref($doc) eq 'ARRAY' ) {
    50          
278 3         24 $string = $self->strip( $doc, $string, $opt);
279             }
280             elsif ( ref($doc) eq 'HASH' ) {
281 3         15 $string = $self->replace( $doc, $string , $opt);
282             }
283             else {
284 0         0 warn '!!! FAILURE !!! unknown type of data struct for $data. Skipping and moveing on.';
285             }
286              
287             }
288 4         120 return $string;
289             }
290              
291             #---------------------------------------------------------------------------
292             # Helper function that do not get exported and should only be run localy
293             #---------------------------------------------------------------------------
294              
295             sub _build_opt {
296 18     18   37 my ($opt) = @_;
297 18 100       81 return ( defined( $opt ) ) ? $opt : '-i';
298             }
299              
300             sub _check_for_opt {
301 22     22   44 my ($self, $opt) = @_;
302 22 100 100     299 if (! defined($opt)
    100 100        
303             && defined($self->{opt})
304             ) {
305 2         7 return $self->{opt};
306             }
307             elsif ( defined($opt)
308             && defined($self->{opt})
309             ) {
310 1         3 return { %{$self->{opt}}, %$opt };
  1         77  
311             }
312             else {
313 19         42 return $opt;
314             }
315             }
316              
317              
318             sub _boundary {
319 18     18   34 my ( $b ) = @_;
320 18 100       51 return ( defined($b) ) ? $b : '\b';
321             }
322              
323              
324             =head1 AUTHOR
325              
326             ben hengst, C<< >>
327              
328             =head1 BUGS
329              
330             Please report any bugs or feature requests to C, or through
331             the web interface at L. I will be notified, and then you'll
332             automatically be notified of progress on your bug as I make changes.
333              
334              
335              
336              
337             =head1 SUPPORT
338              
339             You can find documentation for this module with the perldoc command.
340              
341             perldoc String::Clean
342              
343              
344             You can also look for information at:
345              
346             =over 4
347              
348             =item * RT: CPAN's request tracker
349              
350             L
351              
352             =item * AnnoCPAN: Annotated CPAN documentation
353              
354             L
355              
356             =item * CPAN Ratings
357              
358             L
359              
360             =item * Search CPAN
361              
362             L
363              
364             =back
365              
366              
367             =head1 ACKNOWLEDGEMENTS
368             Lindsey Kuper and Jeff Griffin for giving me a reason to cook up this scheme.
369              
370              
371             =head1 COPYRIGHT & LICENSE
372              
373             Copyright 2007 ben hengst, all rights reserved.
374              
375             This program is free software; you can redistribute it and/or modify it
376             under the same terms as Perl itself.
377              
378              
379             =cut
380              
381             1; # End of String::Clean