File Coverage

blib/lib/Statistics/Running.pm
Criterion Covered Total %
statement 248 297 83.5
branch 35 52 67.3
condition 10 17 58.8
subroutine 46 52 88.4
pod 24 32 75.0
total 363 450 80.6


line stmt bran cond sub pod time code
1             package Statistics::Running;
2              
3 3     3   69408 use 5.006;
  3         20  
4 3     3   15 use strict;
  3         7  
  3         65  
5 3     3   33 use warnings;
  3         6  
  3         106  
6              
7 3     3   1686 use Data::Dumper;
  3         19467  
  3         286  
8              
9             our $VERSION = '0.12';
10              
11             # overload these operators to have special meaning when
12             # operand(s) are Statistics::Running:
13             use overload
14             # add two stats object and adjust summed mean,stdev etc.
15 3         37 '+' => \&concatenate,
16             # check if two stats objects are same wrt mean,stdev,N BUT NOT histogram
17             '==' => \&equals,
18             # convert a stats object into a string, e.g. print $obj."\n";
19             '""' => \&stringify,
20 3     3   3215 ;
  3         2654  
21              
22 3     3   1632 use Try::Tiny;
  3         5797  
  3         153  
23 3     3   1278 use Statistics::Histogram;
  3         84955  
  3         378  
24              
25             # this is for all numerical equality comparisons
26 3     3   30 use constant SMALL_NUMBER_FOR_EQUALITY => 1E-10;
  3         6  
  3         10126  
27              
28             # creates an obj. There are no input params
29             sub new {
30 16     16 1 2776 my $class = $_[0];
31              
32 16   100     88 my $parent = ( caller(1) )[3] || "N/A";
33 16         86 my $whoami = ( caller(0) )[3];
34              
35 16         139 my $self = {
36             # these are internal variables to store mean etc. or used to calculate Kurtosis
37             'M1' => 0.0,
38             'M2' => 0.0,
39             'M3' => 0.0,
40             'M4' => 0.0,
41             'MIN' => 0.0,
42             'MAX' => 0.0,
43             'N' => 0, # number of data items inserted
44             # this histogram is updated each time a new data point is pushed in the object
45             # it just holds the number of items in each bin, so it is not too expensive.
46             # with this we get an idea of the Probability Distribution of the pushed data.
47             # Which may or may not be useful to users.
48             # Should you want to avoid this then use Statistics::Running::Tiny
49             'histo' => {
50             'num-bins' => -1,
51             'bins' => {
52             # b: [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary]
53             'b' => [], # length is 'num-bins'+1
54             # c: contains the counts, its size is equal to the number of bins
55             # the first cell contains counts in the interval [histo-left-boundary, bin1_right_boundary]
56             # the last cell contains counts of [binN-1_right_boundary, histo-right-boundary]
57             'c' => [], # length 'num-bins'
58             },
59             # cached stringified histogram, it is re-calculated only if data points added
60             # and asked to print histogram
61             'stringified' => undef,
62             # when asked to stringify a hist we actually use a cached string
63             # which needs to be recalculated whenever data is added or hist re-created
64             'needs-recalculate' => 1,
65             },
66             };
67 16         33 bless($self, $class);
68 16         49 $self->clear();
69 16         48 return $self
70             }
71             # return the mean of the data entered so far
72 4     4 1 32 sub mean { return $_[0]->{'M1'} }
73             # returns the histogram bins (can be empty) in our internal format
74 2     2 1 19 sub histogram { return $_[0]->{'histo'} }
75             # if no params, it returns our bins as a hash
76             # otherwise it imports input bins in the form of a hash
77             # and before that it erases previous histogram and forms it according to input, e.g.
78             # sets bin-widths and numbins etc.
79             sub histogram_bins_hash {
80 9     9 0 23 my $self = $_[0];
81 9         13 my $bins = $_[1];
82 9 100       18 if( ! defined($bins) ){
83             # export to a hash
84 6         14 return $self->_bins2hash()
85             }
86             # import from a hash
87 3         9 $self->_hash2bins($bins);
88             }
89             # if no params, it returns our bins as a hash of the form returned by Statistics::Descriptive::frequency_distribution()
90             # otherwise it imports input bins in the form of a hash in the form returned by Statistics::Descriptive::frequency_distribution()
91             sub histogram_bins_stathash {
92 0     0 0 0 my $self = $_[0];
93 0         0 my $bi = $_[1];
94 0 0       0 if( ! defined($bi) ){
95             # export to a hash
96 0         0 return $self->_bins2stathash()
97             }
98             # import from a hash
99 0         0 $self->_stathash2bins($bi);
100             }
101             # return a string showing this histogram by calling Statistics::Histogram::print_histogram()
102             # we first convert our hist to stathash format
103             sub histogram_stringify {
104 3     3 0 9 my ($self, @opts) = @_;
105 3 50       16 if( $self->{'histo'}->{'needs-recalculate'} == 1 ){ $self->_histogram_recalculate(@opts) }
  3         11  
106 3         56 return $self->{'histo'}->{'stringified'}
107             }
108             # we need to recalculate each time a new data is added.
109             # but we do recalculate whenever it is needed, i.e. when we asked to print histogram
110             sub _histogram_recalculate {
111 3     3   7 my ($self, @stringify_opts) = @_;
112 3         5 my $histstr = "";
113 3 100       8 if( $self->{'histo'}->{'num-bins'} > 0 ){
114             Try::Tiny::try {
115             $histstr = Statistics::Histogram::print_histogram(
116             'hist' => $self->_bins2stathash(),
117 2     2   99 'x_min' => $self->{'histo'}->{'bins'}->{'b'}->[0],
118             use_linear_axes => 1,
119             @stringify_opts
120             )
121             } Try::Tiny::catch {
122 0     0   0 print STDERR "_histogram_recalculate() : error caught trying to stringify: $_\n";
123 0         0 $histstr = "";
124 2         17 };
125             }
126 3         462 $self->{'histo'}->{'stringified'} = $histstr;
127 3         15 $self->{'histo'}->{'needs-recalculate'} = 0;
128             }
129             # disable histogram logging, all existing histogram data is erased
130             sub histogram_disable {
131 0     0 1 0 my $self = $_[0];
132              
133 0         0 $self->{'histo'}->{'num-bins'} = -1;
134 0         0 $self->{'histo'}->{'bins'}->{'b'} = [];
135 0         0 $self->{'histo'}->{'bins'}->{'c'} = [];
136 0         0 $self->{'histo'}->{'needs-recalculate'} = 1;
137             }
138             # returns the count in bin specified as 1st input param
139 0     0 1 0 sub histogram_count { return $_[0]->{'histo'}->{'c'}->[$_[1]] }
140              
141             # enables histogram logging
142             # it expects some parameters for creating the histogram in various forms,
143             # e.g. by specifying the number of bins, bin-width and left boundary or
144             # by specifying a HASH or ARRAY of bin specifications for non-uniform bin
145             # sizes. HASH must be of the form 'FROM:TO'->counts
146             # ARRAY of bin boundaries of the form
147             # [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary]
148             # the number of bins is 1 less than the length of this array
149             sub histogram_enable {
150 7     7 1 35 my $self = $_[0];
151 7         8 my $params = $_[1]; # $_[1] // {} does not work for perl<5.10, ? : requests $_[1] twice, so Cish if( ! defined below...
152              
153 7 50       14 if( ! defined($params) ){ $params = {} }
  0         0  
154              
155 7         9 my ($m1, $m2, $m3);
156 7 50 33     41 if( defined($m1=$params->{'bins'}) ){
    50 33        
157 0         0 my $aref = ref($m1);
158 0 0       0 if( $aref eq 'ARRAY' ){
    0          
159             # an array of bin boundaries of the form
160             # [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary]
161             # the number of bins is 1 less than the length of this array
162 0         0 my @mm = @$m1;
163 0         0 $self->{'histo'}->{'num-bins'} = scalar(@mm)-1;
164 0         0 $self->{'histo'}->{'bins'}->{'b'} = [@mm];
165 0         0 $self->{'histo'}->{'bins'}->{'c'} = (0) x $self->{'histo'}->{'num-bins'};
166             } elsif( $aref eq 'HASH' ){
167             # a hashref keyed on bin-intervals in the form FROM:TO->counts
168 0         0 $self->_hash2bins($m1);
169 0         0 } else { die "parameter 'bins' expects either a HASHREF keyed on bin-intervals in the form FROM:TO->counts (and counts can be non-zero if that is a previous histogram), or an ARRAYREF with bin boundaries of the form [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary]. In this case the number of bins is 1 less than the length of the array." }
170             } elsif( defined($m1=$params->{'bin-width'})
171             && defined($m2=$params->{'num-bins'})
172             && defined($m3=$params->{'left-boundary'})
173             ){
174             # we re-create our own bins based on num-bins etc.
175 7         13 $self->_histogram_create_bins_from_spec($m1, $m2, $m3)
176             } else {
177             # no params! we need params
178 0         0 print STDERR "enable_histogram() : failed to enable histogram because no histogram specification was supplied. Try enable_histogram({bin-width=>1, nun-bins=>10, left-boundary=>-5});\n";
179             }
180 7         10 $self->{'histo'}->{'needs-recalculate'} = 1;
181             }
182             # set existing histogram to zero counts
183             sub histogram_reset {
184 18     18 1 25 my $self = $_[0];
185              
186             # no params, set all counts OF ALREADY EXISTING histogram to zero
187 18         31 my $m1 = $self->{'histo'}->{'bins'}->{'c'};
188 18         52 for(my $i=$self->{'histo'}->{'num-bins'};$i-->0;){ $m1->[$i] = 0 }
  0         0  
189 18         32 $self->{'histo'}->{'needs-recalculate'} = 1;
190             }
191             # push Data: a sample and process/update mean and all other stat measures
192             # also insert it in histogram
193             sub add {
194 756     756 1 1807 my $self = $_[0];
195 756         891 my $x = $_[1];
196              
197 756         993 my $aref = ref($x);
198              
199 756 100       1155 if( $aref eq '' ){
    50          
200             # a scalar input
201 753         963 my ($delta, $delta_n, $delta_n2, $term1);
202 753         1005 my $n1 = $self->{'N'};
203 753 100       1100 if( $n1 == 0 ){ $self->{'MIN'} = $self->{'MAX'} = $x }
  10         20  
204             else {
205 743 100       1252 if( $x < $self->{'MIN'} ){ $self->{'MIN'} = $x }
  13         18  
206 743 100       1183 if( $x > $self->{'MAX'} ){ $self->{'MAX'} = $x }
  155         210  
207             }
208 753         964 $self->{'N'} += 1; # increment sample size push in
209 753         1149 my $n0 = $self->{'N'};
210              
211 753         946 $delta = $x - $self->{'M1'};
212 753         988 $delta_n = $delta / $n0;
213 753         1167 $delta_n2 = $delta_n * $delta_n;
214 753         946 $term1 = $delta * $delta_n * $n1;
215 753         1005 $self->{'M1'} += $delta_n;
216             $self->{'M4'} += $term1 * $delta_n2 * ($n0*$n0 - 3*$n0 + 3)
217             + 6 * $delta_n2 * $self->{'M2'}
218 753         1754 - 4 * $delta_n * $self->{'M3'}
219             ;
220             $self->{'M3'} += $term1 * $delta_n * ($n0 - 2)
221 753         1170 - 3 * $delta_n * $self->{'M2'}
222             ;
223 753         1002 $self->{'M2'} += $term1;
224             # add data point to the internal histogram
225 753         1179 $self->_histogram_add($x);
226             } elsif( $aref eq 'ARRAY' ){
227             # an array input
228 3         8 foreach (@$x){ $self->add($_) }
  302         458  
229             } else {
230 0         0 die "add(): only ARRAY and SCALAR can be handled (input was type '$aref')."
231             }
232             }
233             # copies input(=src) Running obj into current/self overwriting our data, this is not a clone()!
234             sub copy_from {
235 1     1 1 5 my $self = $_[0];
236 1         2 my $src = $_[1];
237 1         3 $self->{'M1'} = $src->M1();
238 1         3 $self->{'M2'} = $src->M2();
239 1         3 $self->{'M3'} = $src->M3();
240 1         3 $self->{'M4'} = $src->M4();
241 1         16 $self->set_N($src->get_N());
242 1         4 $self->_histogram_copy_from($src);
243             }
244             # clones current obj into a new Running obj with same values
245             sub clone {
246 1     1 1 3 my $self = $_[0];
247 1         4 my $newO = Statistics::Running->new();
248 1         5 $newO->{'M1'} = $self->M1();
249 1         4 $newO->{'M2'} = $self->M2();
250 1         4 $newO->{'M3'} = $self->M3();
251 1         4 $newO->{'M4'} = $self->M4();
252 1         4 $newO->set_N($self->get_N());
253 1         3 return $newO
254             }
255             # clears all data entered/calculated including histogram
256             sub clear {
257 18     18 1 30 my $self = $_[0];
258 18         85 $self->{'M1'} = 0.0;
259 18         29 $self->{'M2'} = 0.0;
260 18         25 $self->{'M3'} = 0.0;
261 18         24 $self->{'M4'} = 0.0;
262 18         45 $self->{'N'} = 0;
263 18         40 $self->histogram_reset();
264             }
265 4     4 1 18 sub min { return $_[0]->{'MIN'} }
266 4     4 1 13 sub max { return $_[0]->{'MAX'} }
267             # get number of total elements entered so far
268 22     22 1 79 sub get_N { return $_[0]->{'N'} }
269             sub variance {
270 4     4 1 7 my $self = $_[0];
271 4         7 my $m = $self->{'N'};
272 4 50       13 if( $m == 1 ){ return 0 }
  0         0  
273 4         32 return $self->{'M2'}/($m-1.0)
274             }
275 4     4 1 13 sub standard_deviation { return sqrt($_[0]->variance()) }
276             sub skewness {
277 3     3 1 7 my $self = $_[0];
278 3         11 my $m = $self->{'M2'};
279 3 100       10 if( $m == 0 ){ return 0 }
  1         13  
280             return sqrt($self->{'N'})
281 2         15 * $self->{'M3'} / ($m ** 1.5)
282             ;
283             }
284             sub kurtosis {
285 4     4 1 10 my $self = $_[0];
286 4         10 my $m = $self->{'M2'};
287 4 100       13 if( $m == 0 ){ return 0 }
  2         9  
288             return $self->{'N'}
289 2         10 * $self->{'M4'}
290             / ($m * $m)
291             - 3.0
292             ;
293             }
294             # concatenates another Running obj with current
295             # AND returns a new Running obj with concatenated stats
296             # Current object is not modified.
297             sub concatenate {
298 4     4 1 23 my $self = $_[0]; # us
299 4         6 my $other = $_[1]; # another Running obj
300              
301 4         12 my $combined = Statistics::Running->new();
302              
303 4         10 my $selfN = $self->get_N();
304 4         30 my $otherN = $other->get_N();
305 4         24 my $selfM2 = $self->M2();
306 4         10 my $otherM2 = $other->M2();
307 4         15 my $selfM3 = $self->M3();
308 4         22 my $otherM3 = $other->M3();
309              
310 4         7 my $combN = $selfN + $otherN;
311 4         10 $combined->set_N($combN);
312            
313 4         10 my $delta = $other->M1() - $self->M1();
314 4         7 my $delta2 = $delta*$delta;
315 4         7 my $delta3 = $delta*$delta2;
316 4         7 my $delta4 = $delta2*$delta2;
317              
318 4         8 $combined->{'M1'} = ($selfN*$self->M1() + $otherN*$other->M1()) / $combN;
319              
320 4         11 $combined->{'M2'} = $selfM2 + $otherM2 +
321             $delta2 * $selfN * $otherN / $combN;
322            
323 4         15 $combined->{'M3'} = $selfM3 + $otherM3 +
324             $delta3 * $selfN * $otherN * ($selfN - $otherN)/($combN*$combN) +
325             3.0*$delta * ($selfN*$otherM2 - $otherN*$selfM2) / $combN
326             ;
327            
328 4         21 $combined->{'M4'} = $self->{'M4'} + $other->{'M4'}
329             + $delta4*$selfN*$otherN * ($selfN*$selfN - $selfN*$otherN + $otherN*$otherN) /
330             ($combN*$combN*$combN)
331             + 6.0*$delta2 * ($selfN*$selfN*$otherM2 + $otherN*$otherN*$selfM2)/($combN*$combN) +
332             4.0*$delta*($selfN*$otherM3 - $otherN*$selfM3) / $combN
333             ;
334              
335             # add the histograms only if structure matches:
336 4 100       11 if( $self->_equals_histograms_structure($other) ){
337 1         3 $combined->_histogram_copy_from($self);
338 1         5 $combined->_add_histograms($other);
339             }
340              
341 4         19 return $combined;
342             }
343             # appends another Running obj INTO current
344             # histogram data is appended only if histogram specs are the same
345             # current obj (self) IS MODIFIED
346             sub append {
347 0     0 1 0 my $self = $_[0]; # us
348 0         0 my $other = $_[1]; # another Running obj
349 0         0 $self->copy_from($self+$other);
350             }
351             # equality only wrt to stats BUT NOT histogram
352             sub equals {
353 4     4 1 19 my $self = $_[0]; # us
354 4         5 my $other = $_[1]; # another Running obj
355             return
356 4   66     9 $self->get_N() == $other->get_N() &&
357             $self->equals_statistics($other)
358             }
359             sub equals_statistics {
360 5     5 1 18 my $self = $_[0]; # us
361 5         8 my $other = $_[1]; # another Running obj
362             return
363 5   33     11 abs($self->M1()-$other->M1()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY &&
364             abs($self->M2()-$other->M2()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY &&
365             abs($self->M3()-$other->M3()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY &&
366             abs($self->M4()-$other->M4()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY
367             }
368             # checks if structure is same and then if bin contents (counts) are same
369             # returns 1 if equals
370             # returns 0 if either structure or counts are not the same
371             sub equals_histograms {
372 2     2 1 10 my $self = $_[0]; # us
373 2         3 my $other = $_[1]; # another Running obj
374              
375             # structure is not the same
376 2 50       5 if( $self->_equals_histograms_structure($other) == 0 ){ return 0 }
  0         0  
377              
378 2         4 my $selfC = $self->{'histo'}->{'bins'}->{'c'};
379 2         2 my $otherC = $other->{'histo'}->{'bins'}->{'c'};
380 2         3 my $i;
381 2         5 for($i=$self->{'histo'}->{'num-bins'};$i-->0;){
382 10 50       19 if( $selfC->[$i] != $otherC->[$i] ){ return 0 }
  0         0  
383             }
384 2         9 return 1 # equal in structure and counts
385             }
386             # adds counts of histograms to us from other
387             # returns 0 if structures do not match
388             # returns 1 if counts added OK
389             sub _add_histograms {
390 1     1   2 my $self = $_[0]; # us
391 1         1 my $other = $_[1]; # another Running obj
392              
393             # structure is not the same
394 1 50       7 if( $self->_equals_histograms_structure($other) == 0 ){ return 0 }
  0         0  
395              
396 1         2 my $selfC = $self->{'histo'}->{'bins'}->{'c'};
397 1         2 my $otherC = $other->{'histo'}->{'bins'}->{'c'};
398 1         1 my $i;
399 1         4 for($i=$self->{'histo'}->{'num-bins'};$i-->0;){
400 10         14 $selfC->[$i] += $otherC->[$i];
401             }
402 1         2 $self->{'histo'}->{'needs-recalculate'} = 1;
403 1         2 return 1 # counts added
404             }
405             # print object as a string, string concat/printing is overloaded on this method
406             sub stringify {
407 3     3 1 12 my $self = $_[0];
408 3         8 return "N: ".$self->get_N()
409             .", mean: ".$self->mean()
410             .", range: ".$self->min()." to ".$self->max()
411             .", standard deviation: ".$self->standard_deviation()
412             .", kurtosis: ".$self->kurtosis()
413             .", skewness: ".$self->skewness()
414             .", histogram:\n".$self->histogram_stringify()
415             }
416             # internal methods, no need for anyone to know or use externally
417 6     6 0 10 sub set_N { $_[0]->{'N'} = $_[1] }
418 28     28 0 77 sub M1 { return $_[0]->{'M1'} }
419 20     20 0 62 sub M2 { return $_[0]->{'M2'} }
420 20     20 0 70 sub M3 { return $_[0]->{'M3'} }
421 12     12 0 49 sub M4 { return $_[0]->{'M4'} }
422             # copy src's histogram to us, erasing previous data and histo-format
423             sub _histogram_copy_from {
424 2     2   4 my $self = $_[0];
425 2         4 my $src = $_[1]; # a src stats object whose histogram we are copying onto us
426 2         6 $self->histogram_bins_hash($src->histogram_bins_hash());
427             }
428             # given bin-width, num-bins and left-boundary create the bin arrays
429             sub _histogram_create_bins_from_spec {
430 7     7   12 my ($self, $bw, $nb, $lb) = @_;
431              
432 7         12 $self->{'histo'}->{'num-bins'} = $nb;
433 7         19 my @B = (0)x($nb+1);
434 7         10 my ($i);
435 7         7 my $v = $lb;
436 7         16 for($i=0;$i<=$nb;$i++){
437 63         66 $B[$i] = $v;
438 63         86 $v += $bw;
439             }
440 7         11 $self->{'histo'}->{'bins'}->{'b'} = \@B;
441 7         16 $self->{'histo'}->{'bins'}->{'c'} = [(0)x$nb];
442 7         15 $self->{'histo'}->{'needs-recalculate'} = 1;
443             }
444             # add a datapoint to the histogram, this is usually called only via the public add()
445             sub _histogram_add {
446 753     753   949 my $self = $_[0];
447 753         902 my $x = $_[1]; # value to add
448 753         920 my ($n, $i);
449 753 100       1352 if( ($n=$self->{'histo'}->{'num-bins'}) <= 0 ){ return }
  502         998  
450 251         296 my $B = $self->{'histo'}->{'bins'}->{'b'};
451 251         373 for($i=0;$i<$n;$i++){
452 1090 100 100     2581 if( ($x > $B->[$i]) && ($x <= $B->[$i+1]) ){
453 146         181 $self->{'histo'}->{'bins'}->{'c'}->[$i]++;
454 146         176 $self->{'histo'}->{'needs-recalculate'} = 1; # need to recalc stringify
455             return
456 146         259 }
457             }
458             }
459             # given the bins and bin counts arrays, return a hash in the natural form:
460             # from-bin:to-bin -> count
461             # see also _bins2stathash for returning a hash of the format specified in Statistics::Descriptive
462             sub _bins2hash {
463 6     6   8 my $self = $_[0];
464 6         8 my %ret = ();
465 6         12 my $B = $self->{'histo'}->{'bins'}->{'b'};
466 6         8 my $C = $self->{'histo'}->{'bins'}->{'c'};
467 6         8 my $i;
468 6         15 for($i=$self->{'histo'}->{'num-bins'};$i-->0;){
469 45         98 $ret{$B->[$i].":".$B->[$i+1]} = $C->[$i]
470             }
471 6         18 return \%ret
472             }
473             # given the bins and bin counts arrays, return a hash with keys
474             # to-bin -> count
475             # whereas count is the count of the bin specified by to-bin and its previous key of the hash
476             sub _bins2stathash {
477 2     2   4 my $self = $_[0];
478 2         4 my %ret = ();
479 2         4 my $B = $self->{'histo'}->{'bins'}->{'b'};
480 2         3 my $C = $self->{'histo'}->{'bins'}->{'c'};
481 2         2 my $i;
482 2         7 for($i=$self->{'histo'}->{'num-bins'}-1;$i-->0;){
483 18         40 $ret{$B->[$i+1]} = $C->[$i]
484             }
485 2         9 return \%ret
486             }
487             # given a hash with keys
488             # from-bin:to-bin -> count
489             # erase and re-create the bin and counts arrays of histo.
490             # for a way to import Statistics::Descriptive frequency_distribution hash check _stathash2bins()
491             sub _hash2bins {
492 3     3   5 my $self = $_[0];
493 3         4 my $H = $_[1];
494 3         5 my @B = ();
495 3         5 my @C = ();
496 3         10 my @K = keys %$H;
497 3         7 $self->{'histo'}->{'num-bins'} = scalar(@K);
498 3         5 my ($acount, $akey);
499              
500             my @X = map {
501 15         20 push(@B, $_->[1]); # left-bin (from)
502 15         22 push(@C, $H->{$_->[0]}); # counts
503 15         22 $_->[2]; # spit out the right-bin (to)
504             }
505 30         41 sort { $a->[1] <=> $b->[1] }
506 3         11 map { [ $_, split(/\:/, $_) ] }
  15         59  
507             @K
508             ;
509 3         9 push(@B, $X[-1]);
510 3         8 $self->{'histo'}->{'bins'}->{'b'} = \@B;
511 3         5 $self->{'histo'}->{'bins'}->{'c'} = \@C;
512 3         11 $self->{'histo'}->{'needs-recalculate'} = 1;
513             }
514             # given a hash with keys
515             # to-bin -> count
516             # erase and re-create the bin and counts arrays of histo.
517             # the hash is exactly what Statistics::Descriptive::frequency_distribution() returns
518             # there is only one problem: what is the left-boundary? we will set it to -infinity.
519             sub _stathash2bins {
520 0     0   0 my $self = $_[0];
521 0         0 my $H = $_[1]; # hashref: exactly what Statistics::Descriptive::frequency_distribution() returns
522 0         0 my @B = ();
523 0         0 my @C = ();
524 0         0 my @K = keys %$H;
525 0         0 $self->{'histo'}->{'num-bins'} = scalar(@K);
526 0         0 my ($acount, $akey);
527              
528 0         0 push(@B, -(~0 >> 1)); # -MAX_INT fuck you.
529 0         0 foreach my $k (sort { $a <=> $b } keys %$H){
  0         0  
530 0         0 push(@B, $k);
531 0         0 push(@C, $H->{$k});
532             }
533 0         0 $self->{'histo'}->{'bins'}->{'b'} = \@B;
534 0         0 $self->{'histo'}->{'bins'}->{'c'} = \@C;
535 0         0 $self->{'histo'}->{'needs-recalculate'} = 1;
536             }
537             # compares the structure of the histograms of us and another obj
538             # if histograms have same number of bins and same bin-specs (boundaries)
539             # then histograms are equal and returns 1
540             # if both histograms contain zero bins (not initialised) then also returns 1
541             # else, histogram structure differs and returns 0
542             sub _equals_histograms_structure {
543 7     7   16 my ($self, $other) = @_;
544              
545 7         11 my $NB1 = $self->{'histo'}->{'num-bins'};
546 7 100       18 if( $NB1 != $other->{'histo'}->{'num-bins'} ){ return 0 }
  3         10  
547              
548             # no bins, so equal!
549 4 50       9 if( $NB1 == -1 ){ return 1 }
  0         0  
550              
551 4         6 my $b1 = $self->{'histo'}->{'bins'}->{'b'};
552 4         5 my $b2 = $other->{'histo'}->{'bins'}->{'b'};
553 4         9 for(my $i=$NB1+1;$i-->0;){
554 34 50       59 if( $b1->[$i] != $b2->[$i] ){ return 0 }
  0         0  
555             }
556 4         13 return 1 # equal histogram STRUCTURES (not bincounts)
557             }
558             1;
559             __END__