File Coverage

blib/lib/Normalize.pm

Criterion	Covered	Total	%
statement	94	96	97.9
branch	24	28	85.7
condition	18	31	58.0
subroutine	13	13	100.0
pod	5	5	100.0
total	154	173	89.0

line	stmt	bran	cond	sub	pod	time	code
1							package Normalize;
2
3	5			5		100118	use warnings;
	5					11
	5					164
4	5			5		30	use strict;
	5					11
	5					166
5	5			5		3805	use Math::Round::Var;
	5					5023
	5					5320
6
7							=head1 NAME
8
9							Normalize - normalize scores between 0 and 1.
10
11							=head1 VERSION
12
13							Version 0.31
14
15							=cut
16
17							our $VERSION = '0.31';
18
19							=head1 SYNOPSIS
20
21							use Normalize;
22
23							my %iq_rate = ('Professor' => 125.12, 'Bender' => 64, 'Dr. Zoidberg' => 28.6, 'Fray' => 13);
24							my %weight_rate = ('Professor' => 70.2, 'Bender' => 600, 'Dr. Zoidberg' => 200, 'Fray' => 120);
25							my $norm = Normalize->new('round_to' => 0.001);
26
27							#larger score is better:
28							$norm->normalize_to_max(\%iq_rate);
29							print "\n#iq rate: larger iq is better:\n";
30							foreach my $key (keys %iq_rate)
31							{
32							print "$key = $iq_rate{$key}\n";
33							}
34
35							#iq rate: larger iq is better:
36							#1.000 Professor
37							#0.512 Bender
38							#0.229 Dr. Zoidberg
39							#0.104 Fray
40
41							#smaller score is better
42							$norm->normalize_to_min(\%weight_rate, {min_default => 0.001});
43							print "\n#skinny rate: smaller weight is better:\n";
44							foreach my $key (sort {$weight_rate{$b} <=> $weight_rate{$a}} keys %weight_rate)
45							{
46							print "#$weight_rate{$key}\t$key\n";
47							}
48							##skinny rate: smaller weight is better:
49							#1.000 Professor
50							#0.585 Fray
51							#0.351 Dr. Zoidberg
52							#0.117 Bender
53
54							#SUMMARY RATE
55							my %summary_score = map { $_ => $weight_rate{$_} + $iq_rate{$_} } keys %iq_rate;
56							$norm->normalize_to_max( \%summary_score );
57							print "\n#summary score:\n";
58							foreach my $key (sort {$summary_score{$b} <=> $summary_score{$a}} keys %summary_score)
59							{
60							print "#$summary_score{$key}\t$key\n";
61							}
62							#summary score:
63							#1.000 Professor
64							#0.344 Fray
65							#0.315 Bender
66							#0.290 Dr. Zoidberg
67
68							#Dr. Zoidberg - looser lobster! Quod erat demonstrandum
69
70
71							=head1 DESCRIPTION
72
73							This module gives you the ability to normalize score result sets.
74							Sometimes a larger score is better and sometimes a smaller score is better.
75							In order to compare the results from different methods? You need a way to
76							normalize them: that is, to get them all within the same range and direction.
77
78							The normalization functions will take a hash ref {key => score} or array ref [score 1, score 2, ...scaore 3] and return the same ref, but whith scores between 0 and 1.
79							Each score is scaled according to how close it to the best result, wich will always have a score of 1.
80
81							=head1 METHODS
82
83							=head2 new(%opts)
84
85							Normalize->new(%opts) - constructor
86
87							=head3 %opts
88
89							round_to - default value 0.01. Rounding precision. For more info see L
90
91							min_default - by default eq round_to value. Need for prevent delete on zero in normalize_to_min()
92
93
94							=cut
95
96							sub new {
97	3			3	1	37	my $caller = shift;
98	3		33			26	my $class = ref($caller) \|\| $caller;
99	3					10	my $self = {};
100	3					11	bless( $self, $class );
101	3					17	$self->_init(@_);
102	3					12	return $self;
103							}
104
105							sub _init {
106	3			3		8	my $self = shift;
107	3					17	$self->set(@_);
108
109							#set default precision
110	3					14	my $round_to = $self->get('round_to');
111	3	100				17	unless ($round_to) {
112	1					2	$round_to = 0.01;
113	1					4	$self->set( round_to => $round_to );
114							}
115	3					36	$self->set( 'round_obj' => Math::Round::Var->new($round_to) );
116	3					9	return $self;
117							}
118
119							=head2 set(%params)
120
121							set object params
122
123							=cut
124
125							sub set {
126	8			8	1	254	my $self = shift;
127	8					28	my %op = @_;
128	8					29	foreach my $k ( keys %op ) {
129	7					42	$self->{$k} = $op{$k};
130							}
131
132	8					27	return $self;
133							}
134
135							=head2 get(param_name)
136
137							get object param
138
139							=cut
140
141							sub get {
142	24			24	1	976	my $self = shift;
143	24					41	my $key = shift;
144	24					113	return $self->{$key};
145							}
146
147							=head2 normalize_to_min($score_set_data, %opts)
148
149							Each score is scaled according to how close it to the smaller result, wich will always have a score of 1.
150							$score_set_data - hashref {key1 => score1, key2 => score2,..} or arrayref [score1, score2, ...]
151							options:
152
153							%opts = (
154							min_default => 0.01#by default = round_to value. Need for prevent delete on zero in normalize_to_min()
155							)
156
157							return same data structure (hashref or arrayref)
158
159							=cut
160
161							sub normalize_to_min {
162	5			5	1	3940	my $self = shift;
163	5					11	my ($data) = @_;
164	5	100				26	if ( ref($data) eq 'HASH' ) {
		50
165	3					14	return $self->_hash_small_is_better(@_);
166							}
167							elsif ( ref($data) eq 'ARRAY' ) {
168	2					8	return $self->_array_small_is_better(@_);
169							}
170	0					0	return undef;
171							}
172
173							=head2 normalize_to_max($score_set_data)
174
175							Each score is scaled according to how close it to the larger result, wich will always have a score of 1.
176							$score_set_data - hashref {key1 => score1, key2 => score2,..} or arrayref [score1, score2, ...]
177
178							return same data structure (hashref or arrayref)
179
180							=cut
181
182							sub normalize_to_max {
183	3			3	1	3105	my $self = shift;
184	3					6	my ($data) = @_;
185	3	100				20	if ( ref($data) eq 'HASH' ) {
		50
186	2					11	return $self->_hash_max_is_better(@_);
187							}
188							elsif ( ref($data) eq 'ARRAY' ) {
189	1					6	return $self->_array_max_is_better(@_);
190							}
191	0					0	return undef;
192							}
193
194							sub _hash_small_is_better {
195	3			3		7	my $self = shift;
196	3					6	my $data = shift;
197	3		50			15	my $opt = shift \|\| {};
198
199	3					5	my $min = undef;
200	3		33			21	my $min_default = $opt->{min}
201							\|\| $self->get('min_default')
202							\|\| $self->get('round_to');
203	3					13	my $rnd = $self->get('round_obj');
204	3					16	foreach my $d ( keys %$data ) {
205	12	100				33	unless ( defined $min ) {
206	3					5	$min = $data->{$d};
207	3					8	next;
208							}
209	9	100				32	$min = $data->{$d} if ( $data->{$d} < $min );
210	9	50	66			118	$min_default = $data->{$d}
211							if ( $data->{$d} && $data->{$d} < $min_default );
212
213							}
214	3		66			14	$min \|\|= $min_default;
215
216	3					12	foreach my $d ( keys %$data ) {
217	12		66			770	$data->{$d} = $rnd->round( $min / ( $data->{$d} \|\| $min_default ) );
218							}
219	3					44	return $data;
220							}
221
222							sub _array_small_is_better {
223	2			2		11	my $self = shift;
224	2					2	my $data = shift;
225	2		100			17	my $opt = shift \|\| {};
226
227	2		33			10	my $min_default = $opt->{min}
228							\|\| $self->get('min_default')
229							\|\| $self->get('round_to');
230	2					5	my $rnd = $self->get('round_obj');
231	2					4	my $min = $data->[0];
232	2					25	foreach my $d (@$data) {
233	7	100				18	$min = $d if ( $d < $min );
234	7	50	66			33	$min_default = $d if ( $d && $d < $min_default );
235							}
236	2		66			13	$min \|\|= $min_default;
237
238	2					7	foreach my $i ( 0 .. $#$data ) {
239	7		66			94	$data->[$i] = $rnd->round( $min / ( $data->[$i] \|\| $min_default ) );
240							}
241	2					27	return $data;
242							}
243
244							sub _hash_max_is_better {
245	2			2		5	my $self = shift;
246	2					4	my $data = shift;
247
248	2					4	my $max = undef;
249	2					9	my $rnd = $self->get('round_obj');
250	2					10	foreach my $d ( keys %$data ) {
251	8	100				21	unless ($max) {
252	2					5	$max = $data->{$d};
253	2					6	next;
254							}
255	6	100				27	$max = $data->{$d} if ( $data->{$d} > $max );
256
257							}
258
259	2					8	foreach my $d ( keys %$data ) {
260	8					107	$data->{$d} = $rnd->round( $data->{$d} / $max );
261							}
262	2					28	return $data;
263
264							}
265
266							sub _array_max_is_better {
267	1			1		2	my $self = shift;
268	1					4	my $data = shift;
269
270	1					3	my $max = undef;
271	1					5	my $rnd = $self->get('round_obj');
272	1					3	foreach my $d (@$data) {
273	3	100				8	unless ($max) {
274	1					4	$max = $d;
275	1					3	next;
276							}
277	2	100				10	$max = $d if ( $d > $max );
278
279							}
280
281	1					4	foreach my $i ( 0 .. $#$data ) {
282	3					63	$data->[$i] = $rnd->round( $data->[$i] / $max );
283							}
284
285	1					16	return $data;
286
287							}
288
289							=head1 SEE ALSO
290
291							L - Variations on rounding.
292
293							Idea for this module and normalization Algoritm from book "Programming Collective Intelligence: Building Smart Web 2.0 Applications By Toby Segaran)" L
294
295							=head1 AUTHOR
296
297							Konstantin Kapitanov aka Green Kakadu, C
298
299							L
300
301							=head1 BUGS
302
303							Please report any bugs or feature requests to C, or through
304							the web interface at L. I will be notified, and then you'll
305							automatically be notified of progress on your bug as I make changes.
306
307
308							=head1 SUPPORT
309
310							You can find documentation for this module with the perldoc command.
311
312							perldoc Normalize
313
314
315							You can also look for information at:
316
317							=over 4
318
319							=item * RT: CPAN's request tracker
320
321							L
322
323							=item * AnnoCPAN: Annotated CPAN documentation
324
325							L
326
327							=item * CPAN Ratings
328
329							L
330
331							=item * Search CPAN
332
333							L
334
335							=back
336
337
338							=head1 ACKNOWLEDGEMENTS
339
340
341							=head1 COPYRIGHT & LICENSE
342
343							Copyright 2009 Konstantin Kapitanov, all rights reserved.
344
345							This program is free software; you can redistribute it and/or modify it
346							under the same terms as Perl itself.
347
348
349							=cut
350
351							1; # End of Normalize