File Coverage

lib/Data/Filter.pm

Criterion	Covered	Total	%
statement	89	94	94.6
branch	13	18	72.2
condition	3	3	100.0
subroutine	28	29	96.5
pod	3	3	100.0
total	136	147	92.5

line	stmt	bran	cond	sub	pod	time	code
1							package Data::Filter;
2
3							# $Id: Filter.pm 20 2006-09-02 20:49:06Z matt $
4
5	1			1		27174	use strict;
	1					2
	1					35
6	1			1		5	use warnings;
	1					2
	1					35
7
8	1			1		6	use vars qw(@ISA @EXPORT $VERSION %Filters);
	1					13
	1					134
9
10							$VERSION = 1.020;
11							@ISA = qw(Exporter);
12							@EXPORT = qw(hashToArray arrayToHash filterData);
13
14	1			1		5	use constant OP_OR => 1;
	1					2
	1					84
15	1			1		6	use constant OP_AND => 2;
	1					2
	1					46
16	1			1		6	use constant OP_NOT => 3;
	1					2
	1					231
17
18							BEGIN
19							{
20	1			1		1657	%Filters = (
21							# equals
22							"eq" => \&_filterEqual,
23							"==" => \&_filterEqualInt,
24
25							# not equals
26							"ne" => \&_filterNotEqual,
27							"!=" => \&_filterNotEqualInt,
28
29							# regex
30							"re" => \&_filterRegex,
31							"=~" => \&_filterRegex,
32
33							# not-regex
34							"nre" => \&_filterNotRegex,
35							"!~" => \&_filterNotRegex,
36
37							# less than
38							"lt" => \&_filterLessThan,
39							"<" => \&_filterLessThanInt,
40
41							# less than, or equal to
42							"le" => \&_filterLessThanOrEqual,
43							"<=" => \&_filterLessThanOrEqualInt,
44
45							# greater than
46							"gt" => \&_filterGreaterThan,
47							">" => \&_filterGreaterThanInt,
48
49							# greater than, or equal to
50							"ge" => \&_filterGreaterThanOrEqual,
51							">=" => \&_filterGreaterThanOrEqualInt,
52
53							# "between" (inclusive)
54							"between" => \&_filterBetween,
55							);
56							}
57
58							sub filterData
59							{
60	26			26	1	26922	my ( $data, $filter ) = @_;
61
62	26	50				94	if ( ! UNIVERSAL::isa ( $data, 'HASH' ) )
63							{
64	0					0	$data = arrayToHash ( $data );
65							}
66
67	26					58	return _evalBranch ( $data, $filter );
68							}
69
70							sub hashToArray
71							{
72	26			26	1	34	my $hash = shift;
73	26					33	my @array = ();
74
75	26					87	foreach ( sort keys %$hash )
76							{
77	53					104	push @array, $hash->{ $_ };
78							}
79
80	26					74	return \@array;
81							}
82
83							sub arrayToHash
84							{
85	1			1	1	97	my $array = shift;
86
87	1	50				7	return unless UNIVERSAL::isa ( $array, 'ARRAY' );
88
89	1					2	my %data;
90	1					3	my $index = 0;
91
92	1					3	foreach ( @$array )
93							{
94	5					13	$data { $index++ } = $_;
95							}
96
97	1					5	return \%data;
98							}
99
100							sub _evalBranch
101							{
102	46			46		61	my ( $data, $filters ) = @_;
103
104	46					232	my %data = %$data;
105	46					128	my @filters = @$filters;
106
107	46					64	my $op = shift @filters;
108
109							# is this a filter?
110
111	46	100				114	if ( defined $Filters { $op } )
112							{
113							# yes
114	34					49	my $sub = $Filters { $op };
115
116							# apply filter to each of the elements of %data
117	34					94	foreach ( keys %data )
118							{
119	153	100				325	delete $data { $_ } unless &$sub ( $data { $_ }, \@filters );
120							}
121							}
122							else
123							{
124							# no!
125	12	100				39	if ( $op == OP_OR )
		100
		50
126							{
127							# run each op and merge the results
128	2					3	my %passed;
129	2					5	foreach ( @filters )
130							{
131							# these pass
132	4					10	_setMerge ( \%passed, _evalBranch ( \%data, $_ ) );
133							}
134	2					9	%data = %passed;
135							}
136							elsif ( $op == OP_NOT )
137							{
138	4					11	_setSubtract ( \%data, _evalBranch ( \%data, $filters [ 0 ] ) );
139							}
140							elsif ( $op == OP_AND )
141							{
142	6					12	foreach ( @filters )
143							{
144	12					24	%data = %{ _evalBranch ( \%data, $_ ) };
	12					27
145							}
146							}
147							else
148							{
149	0					0	die ( "Couldn't identify a filter, or operation" );
150							}
151							}
152
153	46					212	return \%data;
154							}
155
156							sub _setSubtract
157							{
158	4			4		7	my ( $dest, $source ) = @_;
159
160							# remove all of $source from $dest
161	4					8	foreach ( keys %$source )
162							{
163	10					21	delete $dest->{ $_ };
164							}
165							}
166
167							sub _setUnion
168							{
169	0			0		0	my ( $dest, $source ) = @_;
170
171							# union of the 2
172	0					0	foreach ( keys %$dest )
173							{
174	0	0				0	delete $dest->{ $_ } unless defined $source->{ $_ };
175							}
176							}
177
178							sub _setMerge
179							{
180	4			4		27	my ( $dest, $source ) = @_;
181
182							# merge source into destination making sure duplicates aren't merged in
183	4					10	foreach ( keys %$source )
184							{
185	6	100				34	$dest->{ $_ } = $source->{ $_ } unless defined $dest->{ $_ };
186							}
187							}
188
189							sub _filterEqual
190							{
191	33			33		81	my ( $data, $filters ) = @_;
192
193	33					150	return $data->{ $filters->[ 0 ] } eq $filters->[ 1 ];
194							}
195
196							sub _filterEqualInt
197							{
198	11			11		14	my ( $data, $filters ) = @_;
199
200	11					47	return $data->{ $filters->[ 0 ] } == $filters->[ 1 ];
201							}
202
203							sub _filterNotEqual
204							{
205	6			6		9	my ( $data, $filters ) = @_;
206
207	6					32	return $data->{ $filters->[ 0 ] } ne $filters->[ 1 ];
208							}
209
210							sub _filterNotEqualInt
211							{
212	5			5		8	my ( $data, $filters ) = @_;
213
214	5					21	return $data->{ $filters->[ 0 ] } != $filters->[ 1 ];
215							}
216
217							sub _filterRegex
218							{
219	10			10		15	my ( $data, $filters ) = @_;
220	10					64	return $data->{ $filters->[ 0 ] } =~ /$filters->[ 1 ]/;
221							}
222
223							sub _filterNotRegex
224							{
225	10			10		11	my ( $data, $filters ) = @_;
226	10					66	return $data->{ $filters->[ 0 ] } !~ /$filters->[ 1 ]/;
227							}
228
229							sub _filterLessThan
230							{
231	5			5		7	my ( $data, $filters ) = @_;
232	5					25	return $data->{ $filters->[ 0 ] } lt $filters->[ 1 ];
233							}
234
235							sub _filterLessThanInt
236							{
237	25			25		33	my ( $data, $filters ) = @_;
238	25					111	return $data->{ $filters->[ 0 ] } < $filters->[ 1 ];
239							}
240
241							sub _filterLessThanOrEqual
242							{
243	5			5		9	my ( $data, $filters ) = @_;
244	5					27	return $data->{ $filters->[ 0 ] } le $filters->[ 1 ];
245							}
246
247							sub _filterLessThanOrEqualInt
248							{
249	5			5		7	my ( $data, $filters ) = @_;
250	5					21	return $data->{ $filters->[ 0 ] } <= $filters->[ 1 ];
251							}
252
253							sub _filterGreaterThan
254							{
255	5			5		7	my ( $data, $filters ) = @_;
256	5					22	return $data->{ $filters->[ 0 ] } gt $filters->[ 1 ];
257							}
258
259							sub _filterGreaterThanInt
260							{
261	18			18		23	my ( $data, $filters ) = @_;
262	18					80	return $data->{ $filters->[ 0 ] } > $filters->[ 1 ];
263							}
264
265							sub _filterGreaterThanOrEqual
266							{
267	5			5		7	my ( $data, $filters ) = @_;
268
269	5					23	return $data->{ $filters->[ 0 ] } ge $filters->[ 1 ];
270							}
271
272							sub _filterGreaterThanOrEqualInt
273							{
274	5			5		7	my ( $data, $filters ) = @_;
275
276	5					61	return $data->{ $filters->[ 0 ] } >= $filters->[ 1 ];
277							}
278
279							sub _filterBetween
280							{
281	5			5		7	my ( $data, $filters ) = @_;
282
283	5					7	my $value = $data->{ $filters->[ 0 ] };
284
285	5		100			34	return ( $value >= $filters->[ 1 ] && $value <= $filters->[ 2 ] );
286							}
287
288							1;
289
290							=head1 NAME
291
292							Data::Filter - filter data structures with structured filters.
293
294							=head1 SYNOPSIS
295
296							use Data::Filter;
297
298							my %dataSet = (
299							0 => {
300							name => 'Data::Filter',
301							author => 'Matt Wilson',
302							},
303							1 => {
304							name => 'Pod::XML',
305							author => 'Matt Wilson,
306							},
307							# ... etc.
308							);
309
310							my @filter = [
311							Data::Filter::OP_AND,
312							[
313							're',
314							'name',
315							'^Pod',
316							],
317							[
318							're',
319							'name',
320							'XML$',
321							],
322							];
323
324							my %result = %{ filterData ( \%dataSet, \%filter ) };
325
326							=head1 DESCRIPTION
327
328							The structure of the data set is rarely in this format. However, I decided
329							that this was the easiest method to determine (and guarantee) that recursive
330							filters did not confuse the difference between records (as each record has
331							it's own unique key). If, as is more likely, your data set is in an array
332							format, like so;
333
334							my @dataSet = (
335							{
336							name => 'Data::Filter',
337							author => 'Matt Wilson',
338							},
339							{
340							name => 'Pod::XML',
341							author => 'Matt Wilson,
342							},
343							# ... etc.
344							);
345
346							A helper function is provided to convert your array into the required hash
347							reference form;
348
349							my %dataSet = %{ arrayToHash ( \@dataSet ) };
350
351							Where arrayToHash obviously returns a hash reference.
352
353							Similarly, the filterData subroutine returns a hash reference in the same form
354							as the provided data set (hash reference, rather than array). As such, there
355							is also a utility subroutine, hashToArray, to deal with such circumstances.
356
357							Next, let's take a look at the format of the filtering array, as that's fairly
358							important if you'd like to create any meaningful results!
359
360							A filter is of the form;
361
362							[
363							op,
364							column,
365							value,
366							( value2, value3, ... ),
367							]
368
369							or, more complex;
370
371							[
372							OP_AND,
373							[
374							(see above),
375							],
376							[
377							],
378							],
379
380							or, possibly;
381
382							[
383							OP_AND,
384							[
385							OP_NOT,
386							[
387							OP_EQ,
388							column,
389							value,
390							],
391							],
392							[
393							# ...
394							],
395							]
396
397							=head1 CREATING OPERATORS
398
399							It's possible to create your own operator functions (such as the "equals"
400							operator). To do this, simply add a new entry to the Data::Filter::Filters
401							hash, where the key is the name of the operator, and the value is a code
402							reference to the function to call. For instance, the "equals" operator looks
403							like so;
404
405							$Data::Filter::Filters { 'eq' } = \&_filterEqual;
406
407							The subroutine takes two parameters, a hash reference which represents the
408							entry being checked, and an array reference of the filter being executed. The
409							return value is whether or not the data hash reference passes this filter. For
410							example, the _filterEqual subroutine looks like so;
411
412							sub _filterEqual
413							{
414							my ( $data, $filters ) = @_;
415
416							return $data->{ $filters->[ 0 ] } eq $filters->[ 1 ];
417							}
418
419							Where the $filters array reference contains the elements [ column, value ].
420
421							=head1 METHODS
422
423							=over 2
424
425							=item \%filteredData = filterData(\%dataSet,\@filter)
426
427							Perform the actual filtering work using the filter described by @filter on the
428							hash %dataSet. More information can be found in the description section of
429							this POD.
430
431							=item \@data = hashToArray(\%data)
432
433							Convert a internal data representation along the lines of;
434
435							%data = (
436							0 => {
437							# column => value pairs
438							},
439							1 => {
440							# column => value pairs
441							},
442							)
443
444							To an array equivalent;
445
446							@data = (
447							{
448							# column => value pairs
449							},
450							{
451							# column => value pairs
452							},
453							)
454
455							=item \%data = arrayToHash(\@data)
456
457							This subroutine has the opposite effect of the hashToArray subroutine
458							described above.
459
460							=back
461
462							=head1 AUTHOR
463
464							Matt Wilson Ematt AT mattsscripts DOT co DOT ukE
465
466							=head1 LICENSE
467
468							This is free software, you may use it a distribute it under the same terms as
469							Perl itself.
470
471							=cut