File Coverage

blib/lib/Statistics/Descriptive/LogScale.pm
Criterion Covered Total %
statement 345 346 99.7
branch 127 148 85.8
condition 48 65 73.8
subroutine 54 54 100.0
pod 27 27 100.0
total 601 640 93.9


line stmt bran cond sub pod time code
1 22     22   533900 use 5.006;
  22         85  
2 22     22   121 use strict;
  22         39  
  22         496  
3 22     22   106 use warnings;
  22         45  
  22         1893  
4              
5             package Statistics::Descriptive::LogScale;
6              
7             =head1 NAME
8              
9             Statistics::Descriptive::LogScale - Memory-efficient approximate univariate
10             descriptive statistics class.
11              
12             =head1 VERSION
13              
14             Version 0.09
15              
16             =cut
17              
18             our $VERSION = 0.09;
19              
20             =head1 SYNOPSIS
21              
22             =head2 Basic usage
23              
24             The basic usage is roughly the same as that of L.
25              
26             use Statistics::Descriptive::LogScale;
27             my $stat = Statistics::Descriptive::LogScale->new ();
28              
29             while(<>) {
30             chomp;
31             $stat->add_data($_);
32             };
33              
34             # This can also be done in O(1) memory, precisely
35             printf "Mean: %f +- %f\n", $stat->mean, $stat->standard_deviation;
36             # This requires storing actual data, or approximating
37             printf "25%% : %f\n", $stat->percentile(25);
38             printf "Median: %f\n", $stat->median;
39             printf "75%% : %f\n", $stat->percentile(75);
40              
41             =head2 Save/load
42              
43             This is not present in L.
44             The save/load interface is designed compatible with JSON::XS.
45             However, any other serializer can be used.
46             The C method is I to return unblessed hashref
47             with enough information to restore the original object.
48              
49             use Statistics::Descriptive::LogScale;
50             my $stat = Statistics::Descriptive::LogScale->new ();
51              
52             # ..... much later
53             # Save
54             print $fd encoder_of_choice( $stat->TO_JSON )
55             or die "Failed to save: $!";
56              
57             # ..... and even later
58             # Load
59             my $plain_hash = decoder_of_choice( $raw_data );
60             my $copy_of_stat = Statistics::Descriptive::LogScale->new( %$plain_hash );
61              
62             # Import into existing LogScale instance
63             my $plain_hash = decoder_of_choice( $more_raw_data );
64             $copy_of_stat->add_data_hash( $plain_hash->{data} );
65              
66             =head2 Histograms
67              
68             Both L and L
69             offer C method for querying data point counts.
70             However, there's also C method for making pretty pictures.
71             Here's a simple text-based histogram.
72             A proper GD example was too long to fit into this margin.
73              
74             use strict;
75             use warnings;
76              
77             use Statistics::Descriptive::LogScale;
78             my $stat = Statistics::Descriptive::LogScale->new ();
79              
80             # collect/load data ...
81             my $re_float = qr([-+]?(?:\d+\.?\d*|\.\d+)(?:[Ee][-+]?\d+)?);
82             while (<>) {
83             $stat->add_data($_) for m/($re_float)/g;
84             };
85             die "Empty set"
86             unless $stat->count;
87              
88             # get data in [ count, lower_bound, upper_bound ] format as arrayref
89             my $hist = $stat->histogram( count => 20 );
90              
91             # find maximum value to use as a scale factor
92             my $scale = $hist->[0][0];
93             $scale < $_->[0] and $scale = $_->[0] for @$hist;
94              
95             foreach (@$hist) {
96             printf "%10f %s\n", $_->[1], '#' x ($_->[0] * 68 / $scale);
97             };
98             printf "%10f\n", $hist->[-1][2];
99              
100             =head1 DESCRIPTION
101              
102             This module aims at providing some advanced statistical functions without
103             storing all data in memory, at the cost of certain (predictable) precision loss.
104              
105             Data is represented by a set of bins that only store counts of fitting
106             data points.
107             Most bins are logarithmic, i.e. lower end / upper end ratio is constant.
108             However, around zero linear approximation may be user instead
109             (see "linear_width" and "linear_thresh" parameters in new()).
110              
111             All operations are then performed on the bins, introducing relative error
112             which does not, however, exceed the bins' relative width ("base").
113              
114             =head1 METHODS
115              
116             =cut
117              
118             ########################################################################
119             # == HOW IT WORKS ==
120             # Buckets are stored in a hash: { $value => $count, ... }
121             # {base} is bin width, {logbase} == log {base} (cache)
122             # {linear_thresh} is where we switch to equal bin approximation
123             # {linear_width} is width of bin around zero (==linear_thresh if not given)
124             # {floor} is lower bound of bin whose center is 1. {logfloor} = log {floor}
125             # Nearly all meaningful subs have to scan all the bins, which is bad,
126             # but anyway better than scanning full sample.
127              
128 22     22   120 use Carp;
  22         40  
  22         1794  
129 22     22   17822 use POSIX qw(floor ceil);
  22         175957  
  22         122  
130              
131             # Fields are NOT used internally for now, so this is just a declaration
132 22         128 use fields qw(
133             data count
134             linear_width base logbase floor linear_thresh logfloor
135             cache
136 22     22   3422996 );
  22         35081  
137              
138             # This is for infinite portability^W^W portable infinity
139             my $INF = 9**9**9;
140              
141             =head2 new( %options )
142              
143             %options may include:
144              
145             =over
146              
147             =item * base - ratio of adjacent bins. Default is 10^(1/232), which gives
148             1% precision and exact decimal powers.
149             This value represents acceptable relative error in analysis results.
150              
151             B Actual value may be slightly less than requested one.
152             This is done so to avoid troubles with future rounding in (de)serialization.
153              
154             =item * linear_width - width of linear bins around zero.
155             This value represents precision of incoming data.
156             Default is zero, i.e. we assume that the measurement is precise.
157              
158             B Actual value may be less (by no more than a factor of C)
159             so that borders of linear and logarithmic bins fit nicely.
160              
161             =item * linear_thresh - where to switch to linear approximation.
162             If only one of C and C is given,
163             the other will be calculated.
164             However, user may want to specify both in some cases.
165              
166             B Actual value may be less (by no more than a factor of C)
167             so that borders of linear and logarithmic bins fit nicely.
168              
169             =item * data - hashref with C<{ value => weight }> for initializing data.
170             Used for cloning.
171             See C.
172              
173             =item * zero_thresh - absolute value threshold below which everything is
174             considered zero.
175             DEPRECATED, C and C override this if given.
176              
177             =back
178              
179             =cut
180              
181             my @new_keys = qw( base linear_thresh linear_width zero_thresh data );
182             # TODO Throw if extra options given?
183             sub new {
184 41     41 1 8290 my $class = shift;
185 41         149 my %opt = @_;
186              
187             # base for logarithmic bins, sane default: +-1%, exact decimal powers
188             # UGLY HACK number->string->number to avoid
189             # future serialization inconsistencies
190 41   100     194 $opt{base} ||= 10**(1/232);
191 41         403 $opt{base} = 0 + "$opt{base}";
192 41 50       221 $opt{base} > 1 or croak __PACKAGE__.": new(): base must be >1";
193              
194             # calculate where to switch to linear approximation
195             # the condition is: linear bin( thresh ) ~~ log bin( thresh )
196             # i.e. thresh * base - thresh ~~ absolute error * 2
197             # i.e. thresh ~~ absolute_error * 2 / (base - 1)
198             # also support legacy API (zero_thresh)
199 41 100       143 if (defined $opt{linear_thresh} ) {
200 13   100     78 $opt{linear_width} ||= $opt{linear_thresh} * ($opt{base}-1);
201             } else {
202 28         424 $opt{linear_thresh} = $opt{zero_thresh};
203             };
204             $opt{linear_thresh} = abs($opt{linear_width}) / ($opt{base} - 1)
205 41 100 100     222 if $opt{linear_width} and !$opt{linear_thresh};
206 41   100     181 $opt{linear_thresh} ||= 0;
207 41 50       137 $opt{linear_thresh} >= 0
208             or croak __PACKAGE__.": new(): linear_thresh must be >= 0";
209              
210             # Can't use fields::new anymore
211             # due to JSON::XS incompatibility with restricted hashes
212 41         130 my $self = bless {}, $class;
213              
214 41         171 $self->{base} = $opt{base};
215             # cache values to ease calculations
216             # floor = (lower end of bin) / (center of bin)
217 41         125 $self->{floor} = 2/(1+$opt{base});
218 41         263 $self->{logbase} = log $opt{base};
219 41         132 $self->{logfloor} = log $self->{floor};
220              
221             # bootstrap linear_thresh - make it fit bin edge
222 41         91 $self->{linear_width} = $self->{linear_thresh} = 0;
223 41         158 $self->{linear_thresh} = $self->_lower( $opt{linear_thresh} );
224              
225             # divide anything below linear_thresh into odd number of bins
226             # not exceeding requested linear_width
227 41 100       139 if ($self->{linear_thresh}) {
228 15   66     57 my $linear_width = $opt{linear_width} || 2 * $self->{linear_thresh};
229 15         62 my $n_linear = ceil(2 * $self->{linear_thresh} / abs($linear_width));
230 15 100       53 $n_linear++ unless $n_linear % 2;
231 15         41 $self->{linear_width} = (2 * $self->{linear_thresh} / $n_linear);
232             };
233              
234 41         142 $self->clear;
235 41 100       133 if ($opt{data}) {
236 10         31 $self->add_data_hash($opt{data});
237             };
238 41         181 return $self;
239             };
240              
241             =head1 General statistical methods
242              
243             These methods are used to query the distribution properties. They generally
244             follow the interface of L and co,
245             with minor additions.
246              
247             All methods return C on empty data set, except for
248             C, C, C, C and C
249             which all return 0.
250              
251             B This module caches whatever it calculates very agressively.
252             Don't hesitate to use statistical functions (except for sum_of/mean_of)
253             more than once. The cache is deleted upon data entry.
254              
255             =head2 clear
256              
257             Destroy all stored data.
258              
259             =cut
260              
261             sub clear {
262 55     55 1 10259 my $self = shift;
263 55         120 $self->{data} = {};
264 55         3445 $self->{count} = 0;
265 55         118 delete $self->{cache};
266 55         114 return $self;
267             };
268              
269             =head2 add_data( @data )
270              
271             Add numbers to the data pool.
272              
273             Returns self, so that methods can be chained.
274              
275             If incorrect data is given (i.e. non-numeric, undef),
276             an exception is thrown and only partial data gets inserted.
277             The state of object is guaranteed to remain consistent in such case.
278              
279             B Cache is reset, even if no data was actually inserted.
280              
281             B It is possible to add infinite values to data pool.
282             The module will try and calculate whatever can still be calculated.
283             However, no portable way of serializing such values is done yet.
284              
285             =cut
286              
287             sub add_data {
288 51212     51212 1 208194 my $self = shift;
289              
290 51212         58987 delete $self->{cache};
291 51212         81457 foreach (@_) {
292 90136         174826 $self->{data}{ $self->_round($_) }++;
293 90133         164623 $self->{count}++;
294             };
295 51209         97184 $self;
296             };
297              
298             =head2 count
299              
300             Returns number of data points.
301              
302             =cut
303              
304             sub count {
305 69     69 1 10577 my $self = shift;
306 69         314 return $self->{count};
307             };
308              
309             =head2 min
310              
311             =head2 max
312              
313             Values of minimal and maximal bins.
314              
315             NOTE: Due to rounding, some of the actual inserted values may fall outside
316             of the min..max range. This may change in the future.
317              
318             =cut
319              
320             sub min {
321             my $self = shift;
322             return $self->_sort->[0];
323             };
324              
325             sub max {
326             my $self = shift;
327             return $self->_sort->[-1];
328             };
329              
330             =head2 sample_range
331              
332             Return sample range of the dataset, i.e. max() - min().
333              
334             =cut
335              
336             sub sample_range {
337 5     5 1 597 my $self = shift;
338 5 100       14 return $self->count ? $self->max - $self->min : undef;
339             };
340              
341             =head2 sum
342              
343             Return sum of all data points.
344              
345             =cut
346              
347             sub sum {
348             my $self = shift;
349             return $self->sum_of(sub { $_[0] });
350             };
351              
352             =head2 sumsq
353              
354             Return sum of squares of all datapoints.
355              
356             =cut
357              
358             sub sumsq {
359             my $self = shift;
360             return $self->sum_of(sub { $_[0] * $_[0] });
361             };
362              
363             =head2 mean
364              
365             Return mean, or average value, i.e. sum()/count().
366              
367             =cut
368              
369             sub mean {
370             my $self = shift;
371             return $self->{count} ? $self->sum / $self->{count} : undef;
372             };
373              
374             =head2 variance
375              
376             =head2 variance( $correction )
377              
378             Return data variance, i.e. E((x - E(x)) ** 2).
379              
380             Bessel's correction (division by n-1 instead of n) is used by default.
381             This may be changed by specifying $correction explicitly.
382              
383             B The binning strategy used here should also introduce variance bias.
384             This is not yet accounted for.
385              
386             =cut
387              
388             sub variance {
389             my $self = shift;
390             my $correction = shift;
391              
392             # in fact we'll receive correction='' because of how cache works
393             $correction = 1 unless defined $correction and length $correction;
394              
395             return 0 if ($self->{count} < 1 + $correction);
396              
397             my $var = $self->sumsq - $self->sum**2 / $self->{count};
398             return $var <= 0 ? 0 : $var / ( $self->{count} - $correction );
399             };
400              
401             =head2 standard_deviation
402              
403             =head2 standard_deviation( $correction )
404              
405             =head2 std_dev
406              
407             Return standard deviation, i.e. square root of variance.
408              
409             Bessel's correction (division by n-1 instead of n) is used by default.
410             This may be changed by specifying $correction explicitly.
411              
412             B The binning strategy used here should also introduce variance bias.
413             This is not yet accounted for.
414              
415             =cut
416              
417             sub standard_deviation {
418             my $self = shift;
419              
420             return sqrt($self->variance(@_));
421             };
422              
423             =head2 cdf ($x)
424              
425             Cumulative distribution function. Returns estimated probability of
426             random data point from the sample being less than C<$x>.
427              
428             As a special case, C accounts for I of zeroth bin count (if any).
429              
430             Not present in Statistics::Descriptive::Full, but appears in
431             L.
432              
433             =head2 cdf ($x, $y)
434              
435             Returns probability of a value being between C<$x> and C<$y> ($x <= $y).
436             This is essentially C.
437              
438             =cut
439              
440             sub cdf {
441 4     4 1 7 my $self = shift;
442 4 50       12 return unless $self->{count};
443 4         9 return $self->_count(@_) / $self->{count};
444             };
445              
446             =head2 percentile( $n )
447              
448             Find $n-th percentile, i.e. a value below which lies $n % of the data.
449              
450             0-th percentile is by definition -inf and is returned as undef
451             (see Statistics::Descriptive).
452              
453             $n is a real number, not necessarily integer.
454              
455             =cut
456              
457             sub percentile {
458 46     46 1 8715 my $self = shift;
459 46         77 my $x = shift;
460              
461             # assert 0<=$x<=100
462 46 50 33     294 croak __PACKAGE__.": percentile() argument must be between 0 and 100"
463             unless 0<= $x and $x <= 100;
464              
465 46         126 my $need = $x * $self->{count} / 100;
466 46 100       161 return if $need < 1;
467              
468             # dichotomize
469             # $i is lowest value >= needed
470             # $need doesnt exceed last bin!
471 41         124 my $i = _bin_search_ge( $self->_probability, $need );
472 41         113 return $self->_sort->[ $i ];
473             };
474              
475             =head2 quantile( 0..4 )
476              
477             From Statistics::Descriptive manual:
478              
479             0 => zero quartile (Q0) : minimal value
480             1 => first quartile (Q1) : lower quartile = lowest cut off (25%) of data = 25th percentile
481             2 => second quartile (Q2) : median = it cuts data set in half = 50th percentile
482             3 => third quartile (Q3) : upper quartile = highest cut off (25%) of data, or lowest 75% = 75th percentile
483             4 => fourth quartile (Q4) : maximal value
484              
485             =cut
486              
487             sub quantile {
488             my $self = shift;
489             my $t = shift;
490              
491             croak (__PACKAGE__.": quantile() argument must be one of 0..4")
492             unless $t =~ /^[0-4]$/;
493              
494             $t or return $self->min;
495             return $self->percentile($t * 100 / 4);
496             };
497              
498             =head2 median
499              
500             Return median of data, a value that divides the sample in half.
501             Same as percentile(50).
502              
503             =cut
504              
505             sub median {
506 16     16 1 993 my $self = shift;
507 16         52 return $self->percentile(50);
508             };
509              
510             =head2 trimmed_mean( $ltrim, [ $utrim ] )
511              
512             Return mean of sample with $ltrim and $utrim fraction of data points
513             remover from lower and upper ends respectively.
514              
515             ltrim defaults to 0, and rtrim to ltrim.
516              
517             =cut
518              
519             sub trimmed_mean {
520 1     1 1 3 my $self = shift;
521 1         2 my ($lower, $upper) = @_;
522 1   50     4 $lower ||= 0;
523 1 50       4 $upper = $lower unless defined $upper;
524              
525 1         5 my $min = $self->percentile($lower * 100);
526 1         5 my $max = $self->percentile(100 - $upper * 100);
527              
528 1 50       4 return unless $min < $max;
529              
530 1     5   7 return $self->mean_of(sub{$_[0]}, $min, $max);
  5         13  
531             };
532              
533             =head2 harmonic_mean
534              
535             Return harmonic mean of the data, i.e. 1/E(1/x).
536              
537             Return undef if division by zero occurs (see Statistics::Descriptive).
538              
539             =cut
540              
541             sub harmonic_mean {
542 2     2 1 614 my $self = shift;
543              
544 2         4 my $ret;
545 2         4 eval {
546 2     5   7 $ret = $self->count / $self->sum_of(sub { 1/$_[0] });
  5         14  
547             };
548 2 50 66     23 if ($@ and $@ !~ /division.*zero/) {
549 0         0 die $@; # rethrow ALL BUT 1/0 which yields undef
550             };
551 2         10 return $ret;
552             };
553              
554             =head2 geometric_mean
555              
556             Return geometric mean of the data, that is, exp(E(log x)).
557              
558             Dies unless all data points are of the same sign.
559              
560             =cut
561              
562             sub geometric_mean {
563 2     2 1 618 my $self = shift;
564              
565 2 100       8 return unless $self->count;
566 1 50       5 croak __PACKAGE__.": geometric_mean() called on mixed sign sample"
567             if $self->min * $self->max < 0;
568              
569 1 50       5 return 0 if $self->{data}{0};
570             # this must be dog slow, but we already log() too much at this point.
571 1     5   6 my $ret = exp( $self->sum_of( sub { log abs $_[0] } ) / $self->{count} );
  5         15  
572 1 50       5 return $self->min < 0 ? -$ret : $ret;
573             };
574              
575             =head2 skewness
576              
577             Return skewness of the distribution, calculated as
578             n/(n-1)(n-2) * E((x-E(x))**3)/std_dev**3 (this is consistent with Excel).
579              
580             =cut
581              
582             sub skewness {
583 20     20 1 1307 my $self = shift;
584              
585 20         40 my $n = $self->{count};
586 20 100       65 return unless $n > 2;
587              
588             # code stolen from Statistics::Descriptive
589 18         51 my $skew = $n * $self->std_moment(3);
590 18         42 my $correction = $n / ( ($n-1) * ($n-2) );
591 18         89 return $correction * $skew;
592             };
593              
594             =head2 kurtosis
595              
596             Return kurtosis of the distribution, that is 4-th standardized moment - 3.
597             The exact formula used here is consistent with that of Excel and
598             Statistics::Descriptive.
599              
600             =cut
601              
602             sub kurtosis {
603 22     22 1 639 my $self = shift;
604              
605 22         47 my $n = $self->{count};
606 22 100       70 return unless $n > 3;
607              
608             # code stolen from Statistics::Descriptive
609 20         50 my $kurt = $n * $self->std_moment(4);
610 20         65 my $correction1 = ( $n * ($n+1) ) / ( ($n-1) * ($n-2) * ($n-3) );
611 20         48 my $correction2 = ( 3 * ($n-1) ** 2) / ( ($n-2) * ($n-3) );
612              
613 20         115 return $correction1 * $kurt - $correction2;
614             };
615              
616             =head2 central_moment( $n )
617              
618             Return $n-th central moment, that is, E((x - E(x))^$n).
619              
620             Not present in Statistics::Descriptive::Full.
621              
622             =cut
623              
624             sub central_moment {
625             my $self = shift;
626             my $n = shift;
627              
628             my $mean = $self->mean;
629             return $self->sum_of(sub{ ($_[0] - $mean) ** $n }) / $self->{count};
630             };
631              
632             =head2 std_moment( $n )
633              
634             Return $n-th standardized moment, that is,
635             E((x - E(x))**$n) / std_dev(x)**$n.
636              
637             Not present in Statistics::Descriptive::Full.
638              
639             =cut
640              
641             sub std_moment {
642             my $self = shift;
643             my $n = shift;
644              
645             my $mean = $self->mean;
646             my $dev = $self->std_dev;
647             return $self->sum_of(sub{ ($_[0] - $mean) ** $n })
648             / ( $dev**$n * $self->{count} );
649             };
650              
651             =head2 mode
652              
653             Mode of a distribution is the most common value for a discrete distribution,
654             or maximum of probability density for continuous one.
655              
656             For now we assume that the distribution IS discrete, and return the bin with
657             the biggest hit count.
658              
659             NOTE A better algorithm is still wanted. Experimental.
660             Behavior may change in the future.
661              
662             =cut
663              
664             # Naive implementation
665             # Find bin w/greatest count and return it
666             sub mode {
667             my $self = shift;
668              
669             return if !$self->count;
670              
671             my $index = $self->_sort;
672             return $index->[0] if @$index == 1;
673              
674             my @count = map { $self->{data}{$_} } @$index;
675              
676             my $max_index;
677             my $max_growth = 0;
678             for (my $i = 0; $i<@count; $i++) {
679             $count[$i] > $max_growth or next;
680             $max_index = $i;
681             $max_growth = $count[$i];
682             };
683              
684             return $index->[$max_index];
685             };
686              
687             =head2 frequency_distribution_ref( \@index )
688              
689             =head2 frequency_distribution_ref( $n )
690              
691             =head2 frequency_distribution_ref
692              
693             Return numbers of data point counts below each number in @index as hashref.
694              
695             If a number is given instead of arrayref, @index is created
696             by dividing [min, max] into $n intervals.
697              
698             If no parameters are given, return previous result, if any.
699              
700             =cut
701              
702             sub frequency_distribution_ref {
703 3     3 1 628 my $self = shift;
704 3         6 my $index = shift;
705              
706 3 100       11 return unless $self->count;
707             # ah, compatibility - return last value
708             return $self->{cache}{frequency_distribution_ref}
709 2 50       6 unless defined $index;
710             # make index if number given
711 2 100       5 if (!ref $index) {
712 1 50       8 croak __PACKAGE__.": frequency_distribution_ref(): ".
713             "argument must be array, of number > 2, not $index"
714             unless $index > 2;
715 1         5 my $min = $self->_lower($self->min);
716 1         4 my $max = $self->_upper($self->max);
717 1         3 my $step = ($max - $min) / $index;
718 1         2 $index = [ map { $min + $_ * $step } 1..$index ];
  5         11  
719             };
720              
721 2         8 @$index = (-$INF, sort { $a <=> $b } @$index);
  12         25  
722              
723 2         3 my @count;
724 2         10 for (my $i = 0; $i<@$index-1; $i++) {
725 9         26 push @count, $self->_count( $index->[$i], $index->[$i+1] );
726             };
727 2         4 shift @$index; # remove -inf
728              
729 2         3 my %hash;
730 2         22 @hash{@$index} = @count;
731 2         5 $self->{cache}{frequency_distribution_ref} = \%hash;
732 2         9 return \%hash;
733             };
734              
735             =head1 Specific methods
736              
737             The folowing methods only apply to this module, or are experimental.
738              
739             =cut
740              
741             =head2 bucket_width
742              
743             Get bin width (relative to center of bin). Percentiles are off
744             by no more than half of this. DEPRECATED.
745              
746             =cut
747              
748             sub bucket_width {
749 5     5 1 9 my $self = shift;
750 5         29 return $self->{base} - 1;
751             };
752              
753             =head2 log_base
754              
755             Get upper/lower bound ratio for logarithmic bins.
756             This represents relative precision of sample.
757              
758             =head2 linear_width
759              
760             Get width of linear buckets.
761             This represents absolute precision of sample.
762              
763             =head2 linear_threshold
764              
765             Get absolute value threshold below which interpolation is switched to linear.
766              
767             =cut
768              
769             sub log_base {
770 3     3 1 7 my $self = shift;
771 3         14 return $self->{base};
772             };
773              
774             sub linear_width {
775 3     3 1 5 my $self = shift;
776 3         21 return $self->{linear_width};
777             };
778              
779             sub linear_threshold {
780 8     8 1 30 my $self = shift;
781 8         44 return $self->{linear_thresh};
782             };
783              
784             =head2 add_data_hash ( { value => weight, ... } )
785              
786             Add values with counts/weights.
787             This can be used to import data from other
788             Statistics::Descriptive::LogScale object.
789              
790             Returns self, so that methods can be chained.
791              
792             Negative counts are allowed and treated as "forgetting" data.
793             If a bin count goes below zero, such bin is simply discarded.
794             Minus infinity weight is allowed and has the same effect.
795             Data is guaranteed to remain consistent.
796              
797             If incorrect data is given (i.e. non-numeric, undef, or +infinity),
798             an exception is thrown and nothing changes.
799              
800             B Cache may be reset, even if no data was actually inserted.
801              
802             B It is possible to add infinite values to data pool.
803             The module will try and calculate whatever can still be calculated.
804             However, no portable way of serializing such values is done yet.
805              
806             =cut
807              
808             sub add_data_hash {
809 19     19 1 49 my $self = shift;
810 19         36 my $hash = shift;
811              
812             # check incoming data for being numeric, and no +inf values
813 19         30 eval {
814 22     22   56346 use warnings FATAL => qw(numeric);
  22         57  
  22         42584  
815 19         99 while (my ($k, $v) = each %$hash) {
816 289 100 33     2487 $k == 0+$k and $v == 0+$v and $v < $INF
      66        
817             or die "Infinite count for $k\n";
818             }
819             };
820 19 100       290 croak __PACKAGE__.": add_data_hash failed: $@"
821             if $@;
822              
823 17         29 delete $self->{cache};
824              
825             # update our counters
826 17         82 foreach (keys %$hash) {
827 286 50       630 next unless $hash->{$_};
828 286         555 my $key = $self->_round($_);
829              
830             # Insert data. Make sure -Inf doesn't corrupt our counter.
831 286   100     1381 my $newcount = ($self->{data}{$key} || 0) + $hash->{$_};
832 286 100       530 if ($newcount > 0) {
833             # normal insert
834 274         781 $self->{data}{$key} = $newcount;
835 274         565 $self->{count} += $hash->{$_};
836             } else {
837             # We're "forgetting" data, AND the bin got empty
838 12   100     58 $self->{count} -= delete $self->{data}{$key} || 0;
839             };
840             };
841 17         49 $self;
842             };
843              
844             =head2 get_data_hash( %options )
845              
846             Return distribution hashref {value => number of occurances}.
847              
848             This is inverse of add_data_hash.
849              
850             Options may include:
851              
852             =over
853              
854             =item * min - ignore values below this. (See find_boundaries)
855              
856             =item * max - ignore values above this. (See find_boundaries)
857              
858             =item * ltrim - ignore this % of values on lower end. (See find_boundaries)
859              
860             =item * rtrim - ignore this % of values on upper end. (See find_boundaries)
861              
862             =item * noise_thresh - strip bins with count below this.
863              
864             =back
865              
866             =cut
867              
868             sub get_data_hash {
869 63     63 1 13235 my ($self, %opt) = @_;
870              
871             # shallow copy of data if no options given
872 63 100       196 return {%{ $self->{data} }} unless %opt;
  61         820  
873              
874 2         30 my ($min, $max) = $self->find_boundaries( %opt );
875 2   100     15 my $noize = $opt{noize_thresh} || 0;
876              
877 2         4 my $data = $self->{data};
878 2         4 my %hash;
879 2         11 foreach (keys %$data ) {
880 13 100       42 $_ < $min and next;
881 8 50       17 $_ > $max and next;
882 8 100       20 $data->{$_} < $noize and next;
883 6         14 $hash{$_} = $data->{$_};
884             };
885              
886 2         11 return \%hash;
887             };
888              
889             =head2 TO_JSON()
890              
891             Return enough data to recreate the whole object as an unblessed hashref.
892              
893             This routine conforms with C, hence the name.
894             Can be called as
895              
896             my $str = JSON::XS->new->allow_blessed->convert_blessed->encode( $this );
897              
898             B This module DOES NOT require JSON::XS or serialize to JSON.
899             It just deals with data.
900             Use C, C, C or any serializer of choice.
901              
902             my $raw_data = $stat->TO_JSON;
903             Statistics::Descriptive::LogScale->new( %$raw_data );
904              
905             Would generate an exact copy of C<$stat> object
906             (provided it's S::D::L and not a subclass).
907              
908             =head2 clone( [ %options ] )
909              
910             Copy constructor - returns copy of an existing object.
911             Cache is not preserved.
912              
913             Constructor options may be given to override existing data. See new().
914              
915             Trim options may be given to get partial data. See get_data_hash().
916              
917             =cut
918              
919             sub clone {
920 7     7 1 3397 my ($self, %opt) = @_;
921              
922 7         18 my $raw = $self->TO_JSON;
923 7 100       23 if (%opt) {
924             $raw->{data} = $self->get_data_hash( %opt )
925 4 100       18 unless exists $opt{data};
926             exists $opt{$_} and $raw->{$_} = $opt{$_}
927 4   66     51 for @new_keys;
928             };
929              
930 7         35 return (ref $self)->new( %$raw );
931             };
932              
933             sub TO_JSON {
934 31     31 1 152 my $self = shift;
935             # UGLY HACK Increase linear_thresh by a factor of base ** 1/10
936             # so that it's rounded down to present value
937             return {
938             CLASS => ref $self,
939             VERSION => $VERSION,
940             base => $self->{base},
941             linear_width => $self->{linear_width},
942 31         150 linear_thresh => $self->{linear_thresh} * ($self->{base}+9)/10,
943             data => $self->get_data_hash,
944             };
945             };
946              
947             =head2 scale_sample( $scale )
948              
949             Multiply all bins' counts by given value. This can be used to adjust
950             significance of previous data before adding new data (e.g. gradually
951             "forgetting" past data in a long-running application).
952              
953             =cut
954              
955             sub scale_sample {
956 1     1 1 8 my $self = shift;
957 1         2 my $factor = shift;
958 1 50       4 $factor > 0 or croak (__PACKAGE__.": scale_sample: factor must be positive");
959              
960 1         3 delete $self->{cache};
961 1         2 foreach (@{ $self->_sort }) {
  1         9  
962 100         175 $self->{data}{$_} *= $factor;
963             };
964 1         7 $self->{count} *= $factor;
965 1         3 return $self;
966             };
967              
968             =head2 mean_of( $code, [$min, $max] )
969              
970             Return expectation of $code over sample within given range.
971              
972             $code is expected to be a pure function (i.e. depending only on its input
973             value, and having no side effects).
974              
975             The underlying integration mechanism only calculates $code once per bin,
976             so $code should be stable as in not vary wildly over small intervals.
977              
978             =cut
979              
980             sub mean_of {
981 24     24 1 55 my $self = shift;
982 24         40 my ($code, $min, $max) = @_;
983              
984 24     429   108 my $weight = $self->sum_of( sub {1}, $min, $max );
  429         861  
985 24 100       114 return unless $weight;
986 23         52 return $self->sum_of($code, $min, $max) / $weight;
987             };
988              
989             =head1 Experimental methods
990              
991             These methods may be subject to change in the future, or stay, if they
992             are good.
993              
994             =head2 sum_of ( $code, [ $min, $max ] )
995              
996             Integrate arbitrary function over the sample within the [ $min, $max ] interval.
997             Default values for both limits are infinities of appropriate sign.
998              
999             Values in the edge bins are cut using interpolation if needed.
1000              
1001             NOTE: sum_of(sub{1}, $a, $b) would return rough nubmer of data points
1002             between $a and $b.
1003              
1004             EXPERIMENTAL. The method name may change in the future.
1005              
1006             =cut
1007              
1008             sub sum_of {
1009 145     145 1 764 my $self = shift;
1010 145         292 my ($code, $realmin, $realmax) = @_;
1011              
1012             # Just app up stuff
1013 145 100 66     546 if (!defined $realmin and !defined $realmax) {
1014 122         165 my $sum = 0;
1015 122         166 while (my ($val, $count) = each %{ $self->{data} }) {
  63224         168850  
1016 63102         100217 $sum += $count * $code->( $val );
1017             };
1018 122         691 return $sum;
1019             };
1020              
1021 23 100       52 $realmin = -$INF unless defined $realmin;
1022 23 100       50 $realmax = $INF unless defined $realmax;
1023 23 50       53 return 0 if( $realmin >= $realmax );
1024              
1025             # correct limits. $min, $max are indices; $left, $right are limits
1026 23         46 my $min = $self->_round($realmin);
1027 23         49 my $max = $self->_round($realmax);
1028 23         52 my $left = $self->_lower($realmin);
1029 23         51 my $right = $self->_upper($realmax);
1030              
1031             # find first bin that's above $left
1032 23         52 my $keys = $self->_sort;
1033 23         52 my $i = _bin_search_ge($keys, $left);
1034              
1035             # warn "sum_of [$min, $max]";
1036             # add up bins
1037 23         31 my $sum = 0;
1038 23         51 for (; $i < @$keys; $i++) {
1039 284         855 my $val = $keys->[$i];
1040 284 100       510 last if $val > $right;
1041 264         569 $sum += $self->{data}{$val} * $code->( $val );
1042             };
1043              
1044             # cut edges: the hard part
1045             # min and max are now used as indices
1046             # if min or max hits 0, we cut it in half (i.e. into equal 0+ and 0-)
1047             # warn "Add up, sum_of = $sum";
1048 23 100       134 if ($self->{data}{$max}) {
1049 20         45 my $width = $self->_upper($max) - $self->_lower($max);
1050 20 100       61 my $part = $width
1051             ? ($self->_upper($max) - $realmax) / $width
1052             : 0.5;
1053 20         99 $sum -= $self->{data}{$max} * $code->($max) * $part;
1054             };
1055             # warn "Cut R, sum_of = $sum";
1056 23 100       113 if ($self->{data}{$min}) {
1057 20         40 my $width = $self->_upper($min) - $self->_lower($min);
1058 20 100       56 my $part = $width
1059             ? ($realmin - $self->_lower($min)) / $width
1060             : 0.5;
1061 20         71 $sum -= $self->{data}{$min} * $code->($min) * $part;
1062             };
1063             # warn "Cut L, sum_of = $sum";
1064              
1065 23         129 return $sum;
1066             }; # end sum_of
1067              
1068             =head2 histogram ( %options )
1069              
1070             Returns array of form [ [ count0_1, x0, x1 ], [count1_2, x1, x2 ], ... ]
1071             where countX_Y is number of data points between X and Y.
1072              
1073             Options may include:
1074              
1075             =over
1076              
1077             =item * count (+) - number of intervals to divide sample into.
1078              
1079             =item * index (+) - interval borders as array. Will be sorted before processing.
1080              
1081             =item * min - ignore values below this. Default = $self->min - epsilon.
1082              
1083             =item * max - ignore values above this. Default = $self->max + epsilon.
1084              
1085             =item * ltrim - ignore this % of values on lower end.
1086              
1087             =item * rtrim - ignore this % of values on upper end.
1088              
1089             =item * normalize_to - adjust counts so that max number becomes nnn.
1090             This may be useful if one intends to draw pictures.
1091              
1092             =back
1093              
1094             Either count or index must be present.
1095              
1096             NOTE: this is equivalent to frequency_distribution_ref but better suited
1097             for omitting sample tails and outputting pretty pictures.
1098              
1099             =cut
1100              
1101             sub histogram {
1102 5     5 1 1977 my ($self, %opt) = @_;
1103              
1104 5 100       13 return unless $self->count;
1105 4         14 my ($min, $max) = $self->find_boundaries( %opt );
1106             # build/check index
1107 4 100       6 my @index = @{ $opt{index} || [] };
  4         22  
1108 4 100       12 if (!@index) {
1109 3         6 my $n = $opt{count};
1110 3 50       8 $n > 0 or croak (__PACKAGE__.": histogram: insufficient options (count < 1 )");
1111 3         33 my $step = ($max - $min) / $n;
1112 3         12 for (my $x = $min; $x <= $max; $x += $step) {
1113 16         38 push @index, $x;
1114             };
1115             } else {
1116             # sort & uniq raw index
1117 1         3 my %known;
1118 1         2 @index = grep { !$known{$_}++ } @index;
  5         17  
1119 1         4 @index = sort { $a <=> $b } @index;
  8         11  
1120 1 50       5 @index > 1 or croak (__PACKAGE__.": histogram: insufficient options (index < 2)");
1121             };
1122              
1123             # finally: estimated counts between indices!
1124 4         6 my @ret;
1125 4         14 for ( my $i = 0; $i<@index-1; $i++) {
1126 17         41 my $count = $self->_count( $index[$i], $index[$i+1] );
1127 17         78 push @ret, [ $count, $index[$i], $index[$i+1] ];
1128             };
1129              
1130             # if normalize - find maximum & divide by it
1131 4 100       11 if (my $norm = $opt{normalize_to}) {
1132 1         3 my $max = 0;
1133             $max < $_->[0] and $max = $_->[0]
1134 1   66     9 for @ret;
1135 1         2 $norm /= $max;
1136 1         5 $_->[0] *= $norm for @ret;
1137             };
1138              
1139 4         18 return \@ret;
1140             };
1141              
1142             =head2 find_boundaries( %opt )
1143              
1144             Return ($min, $max) of part of sample denoted by options.
1145              
1146             Options may include:
1147              
1148             =over
1149              
1150             =item * min - ignore values below this. default = min() - epsilon.
1151              
1152             =item * max - ignore values above this. default = max() + epsilon.
1153              
1154             =item * ltrim - ignore this % of values on lower end.
1155              
1156             =item * rtrim - ignore this % of values on upper end.
1157              
1158             =back
1159              
1160             If no options are given, the whole sample is guaranteed to reside between
1161             returned values.
1162              
1163             =cut
1164              
1165             sub find_boundaries {
1166 9     9 1 1187 my $self = shift;
1167 9         26 my %opt = @_;
1168              
1169 9 100       33 return unless $self->count;
1170              
1171             # preprocess boundaries
1172 8 100       39 my $min = defined $opt{min} ? $opt{min} : $self->_lower( $self->min );
1173 8 100       39 my $max = defined $opt{max} ? $opt{max} : $self->_upper( $self->max );
1174              
1175 8 100       33 if ($opt{ltrim}) {
1176 1         5 my $newmin = $self->percentile( $opt{ltrim} );
1177 1 50 33     7 defined $newmin and $newmin > $min and $min = $newmin;
1178             };
1179 8 100       23 if ($opt{utrim}) {
1180 1         3 my $newmax = $self->percentile( 100-$opt{utrim} );
1181 1 50 33     14 defined $newmax and $newmax < $max and $max = $newmax;
1182             };
1183              
1184 8         30 return ($min, $max);
1185             };
1186              
1187             ################################################################
1188             # No more public methods please
1189              
1190             # MEMOIZE
1191             # We'll keep methods' returned values under {cache}.
1192             # All setters destroy said cache altogether.
1193             # PLEASE replace this with a ready-made module if there's one.
1194              
1195             # Sorry for this black magic, but it's too hard to write //= in EVERY method
1196             # Long story short
1197             # The next sub replaces $self->foo() with
1198             # sub { $self->{cache}{foo} //= $self->originnal_foo }
1199             # All setter methods are EXPECTED to destroy {cache} altogether.
1200              
1201             # NOTE if you plan subclassing the method, re-memoize methods you change.
1202             sub _memoize_method {
1203 242     242   438 my ($class, $name, $arg) = @_;
1204              
1205 242         1041 my $orig_code = $class->can($name);
1206 242 50       646 die "Error in memoizer section ($name)"
1207             unless ref $orig_code eq 'CODE';
1208              
1209             # begin long conditional
1210             my $cached_code = !$arg
1211             ? sub {
1212 203 100   203   5906 if (!exists $_[0]->{cache}{$name}) {
1213 100         277 $_[0]->{cache}{$name} = $orig_code->($_[0]);
1214             };
1215 203         859 return $_[0]->{cache}{$name};
1216             }
1217             : sub {
1218 169     169   1657 my $self = shift;
1219 169         236 my $arg = shift;
1220 169 100       409 $arg = '' unless defined $arg;
1221              
1222 169 100       593 if (!exists $self->{cache}{"$name:$arg"}) {
1223 100         239 $self->{cache}{"$name:$arg"} = $orig_code->($self, $arg);
1224             };
1225 168         798 return $self->{cache}{"$name:$arg"};
1226 242 100       994 };
1227             # conditional ends here
1228              
1229 22     22   133 no strict 'refs'; ## no critic
  22         38  
  22         760  
1230 22     22   118 no warnings 'redefine'; ## no critic
  22         47  
  22         2694  
1231 242         312 *{$class."::".$name} = $cached_code;
  242         997  
1232             }; # end of _memoize_method
1233              
1234             # Memoize all the methods w/o arguments
1235             foreach ( qw(sum sumsq mean min max mode) ) {
1236             __PACKAGE__->_memoize_method($_);
1237             };
1238              
1239             # Memoize methods with 1 argument
1240             foreach (qw(quantile central_moment std_moment standard_deviation variance)) {
1241             __PACKAGE__->_memoize_method($_, 1);
1242             };
1243              
1244             # add shorter alias of standard_deviation (this must happen AFTER memoization)
1245             {
1246 22     22   329 no warnings 'once'; ## no critic
  22         38  
  22         9137  
1247             *std_dev = \&standard_deviation;
1248             };
1249              
1250             # Get number of values below $x
1251             # Like sum_of(sub{1}, undef, $x), but faster.
1252             # Used by cdf()
1253             sub _count {
1254 105     105   145 my $self = shift;
1255 105 100       259 @_>1 and return $self->_count($_[1]) - $self->_count($_[0]);
1256 74         88 my $x = shift;
1257              
1258 74         134 my $upper = $self->_upper($x);
1259 74         156 my $i = _bin_search_gt( $self->_sort, $upper );
1260 74 100       180 !$i-- and return 0;
1261 65         140 my $count = $self->_probability->[$i];
1262              
1263             # interpolate
1264 65         130 my $bin = $self->_round( $x );
1265 65 100       325 if (my $val = $self->{data}{$bin}) {
1266 34         57 my $width = ($upper - $bin) * 2;
1267 34 100       67 my $part = $width ? ( ($upper - $x) / $width) : 1/2;
1268 34         56 $count -= $part * $val;
1269             };
1270 65         210 return $count;
1271             };
1272              
1273             # BINARY SEARCH
1274             # Not a method, just a function
1275             # Takes sorted \@array and a $num
1276             # Return lowest $i such that $array[$i] >= $num
1277             # Return (scalar @array) if no such $i exists
1278             sub _bin_search_ge {
1279 146     146   370 my ($array, $x) = @_;
1280              
1281 146 100 100     796 return 0 unless @$array and $array->[0] < $x;
1282 125         160 my $l = 0;
1283 125         183 my $r = @$array;
1284 125         308 while ($l+1 < $r) {
1285 403         632 my $m = int( ($l + $r) /2);
1286 403 100       1252 $array->[$m] < $x ? $l = $m : $r = $m;
1287             };
1288 125         347 return $l+1;
1289             };
1290             sub _bin_search_gt {
1291 74     74   103 my ($array, $x) = @_;
1292 74         133 my $i = _bin_search_ge(@_);
1293 74 100 100     326 $i++ if defined $array->[$i] and $array->[$i] == $x;
1294 74         115 return $i;
1295             };
1296              
1297             # THE CORE
1298             # Here come the number=>bin functions
1299             # round() generates bin center
1300             # upper() and lower() are respective boundaries.
1301             # Here's the algorithm:
1302             # 1) determine whether bin is linear or logarithmic
1303             # 2) for linear bins, return bin# * bucket_width (==2*absolute error)
1304             # add/subtract 0.5 to get edges.
1305             # 3) for logarithmic bins, return base ** bin# with appropriate sign
1306             # multiply by precalculated constant to get edges
1307             # note that +0.5 doesn't work here, since sqrt(a*b) != (a+b)/2
1308             # This part is fragile and can be written better
1309              
1310             # center of bin containing x
1311             sub _round {
1312 90888     90888   109040 my $self = shift;
1313 90888         109305 my $x = shift;
1314              
1315 22     22   180 use warnings FATAL => qw(numeric uninitialized);
  22         37  
  22         3570  
1316              
1317 90888 100       189372 if (abs($x) <= $self->{linear_thresh}) {
1318             return $self->{linear_width}
1319 1761   100     6796 && $self->{linear_width} * floor( $x / $self->{linear_width} + 0.5 );
1320             };
1321 89124         225567 my $i = floor (((log abs $x) - $self->{logfloor})/ $self->{logbase});
1322 89124         152199 my $value = $self->{base} ** $i;
1323 89124 100       415819 return $x < 0 ? -$value : $value;
1324             };
1325              
1326             # lower, upper limits of bin containing x
1327             sub _lower {
1328 882     882   58216 my $self = shift;
1329 882         1151 my $x = shift;
1330              
1331 22     22   116 use warnings FATAL => qw(numeric uninitialized);
  22         33  
  22         8391  
1332              
1333 882 100       2365 if (abs($x) <= $self->{linear_thresh}) {
1334             return $self->{linear_width}
1335 183   66     1419 && $self->{linear_width} * (floor( $x / $self->{linear_width} + 0.5) - 0.5);
1336             };
1337 699         2309 my $i = floor (((log abs $x) - $self->{logfloor} )/ $self->{logbase});
1338 699 100       1571 if ($x > 0) {
1339 310         1300 return $self->{floor} * $self->{base}**($i);
1340             } else {
1341 389         1627 return -$self->{floor} * $self->{base}**($i+1);
1342             };
1343             };
1344              
1345             sub _upper {
1346 462     462   4814 return -$_[0]->_lower(-$_[1]);
1347             };
1348              
1349             # build bin index
1350             sub _sort {
1351 189     189   260 my $self = shift;
1352             return $self->{cache}{sorted}
1353 189   100     863 ||= [ sort { $a <=> $b } keys %{ $self->{data} } ];
  1232         1909  
  31         262  
1354             };
1355              
1356             # build cumulative bin counts index
1357             sub _probability {
1358 106     106   134 my $self = shift;
1359 106   66     425 return $self->{cache}{probability} ||= do {
1360 19         35 my @array;
1361 19         36 my $sum = 0;
1362 19         33 foreach (@{ $self->_sort }) {
  19         56  
1363 160         255 $sum += $self->{data}{$_};
1364 160         299 push @array, $sum;
1365             };
1366 19         129 \@array;
1367             };
1368             };
1369              
1370             =head1 AUTHOR
1371              
1372             Konstantin S. Uvarin, C<< >>
1373              
1374             =head1 BUGS
1375              
1376             The module is currently under development. There may be bugs.
1377              
1378             C only works for discrete distributions, and simply returns
1379             the first bin with largest bin count.
1380             A better algorithm is wanted.
1381              
1382             C should have been made a private method.
1383             Its signature and/or name may change in the future.
1384              
1385             See the TODO file in the distribution package.
1386              
1387             Please feel free to post bugs and/or feature requests to github:
1388             L
1389              
1390             Alternatively, you can use CPAN RT
1391             via e-mail C,
1392             or through the web interface at
1393             L.
1394              
1395             Your contribution is appreciated.
1396              
1397             =head1 SUPPORT
1398              
1399             You can find documentation for this module with the perldoc command.
1400              
1401             perldoc Statistics::Descriptive::LogScale
1402              
1403             You can also look for information at:
1404              
1405             =over 4
1406              
1407             =item * GitHub:
1408              
1409             L
1410              
1411             =item * RT: CPAN's request tracker (report bugs here)
1412              
1413             L
1414              
1415             =item * AnnoCPAN: Annotated CPAN documentation
1416              
1417             L
1418              
1419             =item * CPAN Ratings
1420              
1421             L
1422              
1423             =item * Search CPAN
1424              
1425             L
1426              
1427             =back
1428              
1429             =head1 ACKNOWLEDGEMENTS
1430              
1431             This module was inspired by a talk that Andrew Aksyonoff, author of
1432             L,
1433             has given at HighLoad++ conference in Moscow, 2012.
1434              
1435             L was and is used as reference when in doubt.
1436             Several code snippets were shamelessly stolen from there.
1437              
1438             C and C parameter names were suggested by
1439             CountZero from http://perlmonks.org
1440              
1441             =head1 LICENSE AND COPYRIGHT
1442              
1443             Copyright 2013-2015 Konstantin S. Uvarin.
1444              
1445             This program is free software; you can redistribute it and/or modify it
1446             under the terms of either: the GNU General Public License as published
1447             by the Free Software Foundation; or the Artistic License.
1448              
1449             See http://dev.perl.org/licenses/ for more information.
1450              
1451             =cut
1452              
1453             1; # End of Statistics::Descriptive::LogScale