File Coverage

blib/lib/Text/Quantize.pm
Criterion Covered Total %
statement 77 77 100.0
branch 36 40 90.0
condition 4 5 80.0
subroutine 7 7 100.0
pod 2 2 100.0
total 126 131 96.1


line stmt bran cond sub pod time code
1             package Text::Quantize;
2             # ABSTRACT: render a list of numbers as a textual chart
3 6     6   133479 use strict;
  6         13  
  6         213  
4 6     6   27 use warnings;
  6         11  
  6         165  
5 6     6   38 use List::Util 'sum';
  6         17  
  6         871  
6 6         66 use Sub::Exporter -setup => {
7             exports => ['quantize', 'bucketize'],
8             groups => {
9             default => ['quantize'],
10             },
11 6     6   17501 };
  6         90164  
12              
13             sub bucketize {
14 25     25 1 8154 my $elements = shift;
15 25 50       140 my $options = {
16             add_endpoints => 1,
17 25         34 %{ shift(@_) || {} },
18             };
19              
20 25         41 my %buckets;
21 25         45 for my $element (@$elements) {
22 359         311 my $bucket;
23              
24 359 100       711 if ($element == 0) {
    100          
25 8         9 $bucket = 0;
26             }
27             elsif ($element < 0) {
28             # log(negative) is an error, so take the log of the absolute value then negate it
29 123         224 $bucket = -1 * (2 ** int(log(-$element) / log(2)));
30             }
31             else {
32             # which power of 2 is this greater-than-or-equal to?
33 228         445 $bucket = 2 ** int(log($element) / log(2));
34             }
35              
36 359         1460 $buckets{$bucket}++;
37             }
38              
39             # allow user to specify only one of the two endpoints if desired, and figure the other out
40 25 100       84 if ($options->{add_endpoints}) {
41 6 100 66     37 unless (defined($options->{minimum}) && defined($options->{maximum})) {
42 5         24 my ($min, $max) = _endpoints_for(\%buckets);
43 5 50       33 $options->{minimum} = $min if !defined($options->{minimum});
44 5 100       51 $options->{maximum} = $max if !defined($options->{maximum});
45             }
46             }
47              
48             # if add_endpoints, then use the expanded range that we calculated or the user specified
49             # otherwise, start at the first bucket with data and go until the last bucket with data
50 88         113 my ($start, $end) = $options->{add_endpoints}
51             ? ($options->{minimum}, $options->{maximum})
52 25 100       109 : (sort { $a <=> $b } keys %buckets)[0,-1];
53              
54             # force every bucket in the range to exist
55 25         56 my $i = $start;
56 25         68 while ($i <= $end) {
57 141   100     364 $buckets{$i} ||= 0;
58 141 100       350 if ($i == 0) {
    100          
    100          
59 9         17 $i = 1;
60             }
61             elsif ($i == -1) {
62 5         11 $i = 0;
63             }
64             elsif ($i < 0) {
65 29         57 $i /= 2; # since we're negative, increasing means smaller numbers
66             }
67             else {
68 98         204 $i *= 2;
69             }
70             }
71 25         89 return \%buckets;
72             }
73              
74             sub quantize {
75 6     6 1 1968 my $elements = shift;
76 6 100       130 my %options = (
77             distribution_width => 40,
78             distribution_character => '@',
79             left_label => 'value',
80             middle_label => 'Distribution',
81             right_label => 'count',
82 6         19 %{ shift(@_) || {} },
83             );
84              
85 6         35 my $buckets = bucketize($elements, \%options);
86              
87             # pull these out because we consult them a lot, and in loops
88 6         15 my $distribution_width = $options{distribution_width};
89 6         13 my $distribution_character = $options{distribution_character};
90              
91             # the divisor deciding how wide each bucket's bar will be
92 6         71 my $sum = sum values %$buckets;
93              
94             # how wide must the first column (with the left_label and values) be?
95 6         16 my $left_width = length($options{left_label});
96 6         24 for my $bucket (keys %$buckets) {
97 69 50       145 $left_width = length($bucket)
98             if length($bucket) > $left_width;
99             }
100             # add that extra space before every row
101 6         16 $left_width++;
102              
103             # how many - characters do we need?
104 6         16 my $middle_spacer = $distribution_width - length($options{middle_label}) - 2;
105              
106             # these will be different when $middle_spacer is odd, but we
107             # always want them to sum to $middle_spacer.
108 6         15 my $middle_left = int($middle_spacer / 2);
109 6         10 my $middle_right = $middle_spacer - $middle_left;
110              
111 6         209 my @output = sprintf '%*s %s %s %s %s',
112             $left_width,
113             $options{left_label},
114             ('-' x $middle_left),
115             $options{middle_label},
116             ('-' x $middle_right),
117             $options{right_label};
118              
119 6         34 for my $bucket (sort { $a <=> $b } keys %$buckets) {
  182         211  
120 69         103 my $count = $buckets->{$bucket};
121 69         87 my $ratio = ($count / $sum);
122 69         86 my $width = $distribution_width * $ratio;
123              
124 69         287 push @output, sprintf '%*d |%-*s %d',
125             $left_width,
126             $bucket,
127             $distribution_width,
128             ($distribution_character x $width),
129             $count;
130             }
131              
132 6 50       112 return wantarray ? @output : (join "\n", @output)."\n";
133             }
134              
135             # given a set of buckets, find the power of two smaller than the
136             # smallest element, and the power of two greater than or equal to the
137             # largest element. used for add_endpoints
138             sub _endpoints_for {
139 21     21   61 my $buckets = shift;
140 21         41 my ($min_endpoint, $max_endpoint);
141              
142 21         80 my @sorted_buckets = (sort { $a <=> $b } keys %$buckets);
  144         216  
143 21         56 my ($min, $max) = @sorted_buckets[0, -1];
144              
145 21 100       69 if ($min == 0) {
    100          
    100          
146 4         4 $min_endpoint = -1;
147             }
148             elsif ($min == 1) {
149 1         2 $min_endpoint = 0;
150             }
151             elsif ($min < 0) {
152 6         24 $min_endpoint = -1 * (2 ** (int(log(-$min) / log(2)) + 1));
153             }
154             else {
155 10         30 $min_endpoint = 2 ** (int(log($min) / log(2)) - 1);
156             }
157              
158 21 100       70 if ($max == 0) {
    100          
    100          
159 1         2 $max_endpoint = 1;
160             }
161             elsif ($max == -1) {
162 2         3 $max_endpoint = 0;
163             }
164             elsif ($max < 0) {
165 2         9 $max_endpoint = -1 * (2 ** (int(log(-$max) / log(2)) - 1));
166             }
167             else {
168 16         34 $max_endpoint = 2 ** (int(log($max) / log(2)) + 1);
169             }
170              
171 21         84 return ($min_endpoint, $max_endpoint);
172             }
173              
174             1;
175              
176              
177              
178             =pod
179              
180             =head1 NAME
181              
182             Text::Quantize - render a list of numbers as a textual chart
183              
184             =head1 VERSION
185              
186             version 0.05
187              
188             =head1 SYNOPSIS
189              
190             use Text::Quantize;
191            
192             print quantize([26, 24, 51, 77, 21]);
193            
194             __END__
195            
196             value ------------- Distribution ------------- count
197             8 | 0
198             16 |@@@@@@@@@@@@@@@@@@@@@@@@ 3
199             32 |@@@@@@@@ 1
200             64 |@@@@@@@@ 1
201             128 | 0
202              
203             =head1 FUSSY SYNOPSIS
204              
205             use Text::Quantize ();
206              
207             print Text::Quantize::quantize([map { chomp; $_ } ], {
208             left_label => 'microseconds',
209             middle_label => 'Calls per time bucket',
210             right_label => 'syscalls',
211             distribution_width => 80,
212             distribution_character => '=',
213             });
214            
215             __END__
216            
217             microseconds ---------------------------- Calls per time bucket ----------------------------- syscalls
218             256 | 0
219             512 |==== 5
220             1024 |===== 7
221             2048 |================== 23
222             4096 |============================ 36
223             8192 |======= 9
224             16384 |= 2
225             32768 | 1
226             262144 | 1
227             524288 | 1
228             1048576 | 1
229             2097152 |======= 9
230             4194304 |=== 4
231             8388608 | 1
232             16777216 | 0
233              
234             =head1 FUNCTIONS
235              
236             =head2 C
237              
238             C takes an array reference of integers and an optional
239             hash reference of options, and produces a textual histogram of the
240             integers bucketed into powers-of-2 sets.
241              
242             Options include:
243              
244             =over 4
245              
246             =item C (default: C)
247              
248             Controls the text of the left-most label which represents the
249             bucket's contents.
250              
251             =item C (default: C)
252              
253             Controls the text of the middle label which can be used to title
254             the histogram.
255              
256             =item C (default: C)
257              
258             Controls the text of the right-most label which represents how many
259             items are in that bucket.
260              
261             =item C (default: C<40>)
262              
263             Controls how many characters wide the textual histogram is. This
264             does not include the legends.
265              
266             =item C (default: C<@>)
267              
268             Controls the character used to represent the data in the histogram.
269              
270             =item C (default: C<1>)
271              
272             Controls whether the top and bottom lines (which are going to have
273             values of 0) are added. They're included by default because it hints
274             that the data set is complete.
275              
276             =back
277              
278             =head2 C
279              
280             C takes an array reference of integers and an optional
281             hash reference of options, and produces a hash reference of those
282             integers bucketed into powers-of-2 sets.
283              
284             Options include:
285              
286             =over 4
287              
288             =item add_endpoints (default: C<1>)
289              
290             Controls whether extra buckets, smaller than the minimum value and
291             larger than the maximum value, (which are going to have values of
292             0) are added. They're included by default because it hints that the
293             data set is complete.
294              
295             =back
296              
297             =head1 SEE ALSO
298              
299             C, which is where I first saw this kind of C
300             histogram.
301              
302             L, which ported C to Perl first, and from which I
303             took a few insights.
304              
305             =head1 AUTHOR
306              
307             Shawn M Moore
308              
309             =head1 COPYRIGHT AND LICENSE
310              
311             This software is copyright (c) 2012 by Infinity Interactive.
312              
313             This is free software; you can redistribute it and/or modify it under
314             the same terms as the Perl 5 programming language system itself.
315              
316             =cut
317              
318              
319             __END__