File Coverage

blib/lib/Statistics/Running.pm
Criterion Covered Total %
statement 259 309 83.8
branch 38 56 67.8
condition 10 17 58.8
subroutine 47 54 87.0
pod 26 34 76.4
total 380 470 80.8


line stmt bran cond sub pod time code
1             package Statistics::Running;
2              
3 3     3   66331 use 5.006;
  3         24  
4 3     3   15 use strict;
  3         6  
  3         72  
5 3     3   29 use warnings;
  3         7  
  3         99  
6              
7 3     3   1798 use Data::Dumper;
  3         19952  
  3         307  
8              
9             our $VERSION = '0.13';
10              
11             # overload these operators to have special meaning when
12             # operand(s) are Statistics::Running:
13             use overload
14             # add two stats object and adjust summed mean,stdev etc.
15 3         40 '+' => \&concatenate,
16             # check if two stats objects are same wrt mean,stdev,N BUT NOT histogram
17             '==' => \&equals,
18             # convert a stats object into a string, e.g. print $obj."\n";
19             '""' => \&stringify,
20 3     3   3368 ;
  3         2783  
21              
22 3     3   1712 use Try::Tiny;
  3         6037  
  3         162  
23 3     3   1322 use Statistics::Histogram;
  3         90541  
  3         422  
24              
25             # this is for all numerical equality comparisons
26 3     3   29 use constant SMALL_NUMBER_FOR_EQUALITY => 1E-10;
  3         8  
  3         11179  
27              
28             # creates an obj. There are no input params
29             sub new {
30 16     16 1 3373 my $class = $_[0];
31              
32 16   100     97 my $parent = ( caller(1) )[3] || "N/A";
33 16         100 my $whoami = ( caller(0) )[3];
34              
35 16         140 my $self = {
36             # these are internal variables to store mean etc. or used to calculate Kurtosis
37             'M1' => 0.0,
38             'M2' => 0.0,
39             'M3' => 0.0,
40             'M4' => 0.0,
41             'ABS-SUM' => 0.0,
42             'SUM' => 0.0,
43             'MIN' => 0.0,
44             'MAX' => 0.0,
45             'N' => 0, # number of data items inserted
46             # this histogram is updated each time a new data point is pushed in the object
47             # it just holds the number of items in each bin, so it is not too expensive.
48             # with this we get an idea of the Probability Distribution of the pushed data.
49             # Which may or may not be useful to users.
50             # Should you want to avoid this then use Statistics::Running::Tiny
51             'histo' => {
52             'num-bins' => -1,
53             'bins' => {
54             # b: [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary]
55             'b' => [], # length is 'num-bins'+1
56             # c: contains the counts, its size is equal to the number of bins
57             # the first cell contains counts in the interval [histo-left-boundary, bin1_right_boundary]
58             # the last cell contains counts of [binN-1_right_boundary, histo-right-boundary]
59             'c' => [], # length 'num-bins'
60             },
61             # cached stringified histogram, it is re-calculated only if data points added
62             # and asked to print histogram
63             'stringified' => undef,
64             # when asked to stringify a hist we actually use a cached string
65             # which needs to be recalculated whenever data is added or hist re-created
66             'needs-recalculate' => 1,
67             },
68             };
69 16         35 bless($self, $class);
70 16         51 $self->clear();
71 16         52 return $self
72             }
73             # returns the histogram bins (can be empty) in our internal format
74 2     2 1 24 sub histogram { return $_[0]->{'histo'} }
75             # if no params, it returns our bins as a hash
76             # otherwise it imports input bins in the form of a hash
77             # and before that it erases previous histogram and forms it according to input, e.g.
78             # sets bin-widths and numbins etc.
79             sub histogram_bins_hash {
80 9     9 0 29 my $self = $_[0];
81 9         12 my $bins = $_[1];
82 9 100       30 if( ! defined($bins) ){
83             # export to a hash
84 6         16 return $self->_bins2hash()
85             }
86             # import from a hash
87 3         11 $self->_hash2bins($bins);
88             }
89             # if no params, it returns our bins as a hash of the form returned by Statistics::Descriptive::frequency_distribution()
90             # otherwise it imports input bins in the form of a hash in the form returned by Statistics::Descriptive::frequency_distribution()
91             sub histogram_bins_stathash {
92 0     0 0 0 my $self = $_[0];
93 0         0 my $bi = $_[1];
94 0 0       0 if( ! defined($bi) ){
95             # export to a hash
96 0         0 return $self->_bins2stathash()
97             }
98             # import from a hash
99 0         0 $self->_stathash2bins($bi);
100             }
101             # return a string showing this histogram by calling Statistics::Histogram::print_histogram()
102             # we first convert our hist to stathash format
103             sub histogram_stringify {
104 3     3 0 9 my ($self, @opts) = @_;
105 3 50       11 if( $self->{'histo'}->{'needs-recalculate'} == 1 ){ $self->_histogram_recalculate(@opts) }
  3         9  
106 3         63 return $self->{'histo'}->{'stringified'}
107             }
108             # we need to recalculate each time a new data is added.
109             # but we do recalculate whenever it is needed, i.e. when we asked to print histogram
110             sub _histogram_recalculate {
111 3     3   7 my ($self, @stringify_opts) = @_;
112 3         7 my $histstr = "";
113 3 100       11 if( $self->{'histo'}->{'num-bins'} > 0 ){
114             Try::Tiny::try {
115             $histstr = Statistics::Histogram::print_histogram(
116             'hist' => $self->_bins2stathash(),
117 2     2   118 'x_min' => $self->{'histo'}->{'bins'}->{'b'}->[0],
118             use_linear_axes => 1,
119             @stringify_opts
120             )
121             } Try::Tiny::catch {
122 0     0   0 print STDERR "_histogram_recalculate() : error caught trying to stringify: $_\n";
123 0         0 $histstr = "";
124 2         26 };
125             }
126 3         581 $self->{'histo'}->{'stringified'} = $histstr;
127 3         23 $self->{'histo'}->{'needs-recalculate'} = 0;
128             }
129             # disable histogram logging, all existing histogram data is erased
130             sub histogram_disable {
131 0     0 1 0 my $self = $_[0];
132              
133 0         0 $self->{'histo'}->{'num-bins'} = -1;
134 0         0 $self->{'histo'}->{'bins'}->{'b'} = [];
135 0         0 $self->{'histo'}->{'bins'}->{'c'} = [];
136 0         0 $self->{'histo'}->{'needs-recalculate'} = 1;
137             }
138             # returns the count in bin specified as 1st input param
139 0     0 1 0 sub histogram_count { return $_[0]->{'histo'}->{'c'}->[$_[1]] }
140              
141             # enables histogram logging
142             # it expects some parameters for creating the histogram in various forms,
143             # e.g. by specifying the number of bins, bin-width and left boundary or
144             # by specifying a HASH or ARRAY of bin specifications for non-uniform bin
145             # sizes. HASH must be of the form 'FROM:TO'->counts
146             # ARRAY of bin boundaries of the form
147             # [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary]
148             # the number of bins is 1 less than the length of this array
149             sub histogram_enable {
150 7     7 1 45 my $self = $_[0];
151 7         8 my $params = $_[1]; # $_[1] // {} does not work for perl<5.10, ? : requests $_[1] twice, so Cish if( ! defined below...
152              
153 7 50       16 if( ! defined($params) ){ $params = {} }
  0         0  
154              
155 7         14 my ($m1, $m2, $m3);
156 7 50 33     47 if( defined($m1=$params->{'bins'}) ){
    50 33        
157 0         0 my $aref = ref($m1);
158 0 0       0 if( $aref eq 'ARRAY' ){
    0          
159             # an array of bin boundaries of the form
160             # [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary]
161             # the number of bins is 1 less than the length of this array
162 0         0 my @mm = @$m1;
163 0         0 $self->{'histo'}->{'num-bins'} = scalar(@mm)-1;
164 0         0 $self->{'histo'}->{'bins'}->{'b'} = [@mm];
165 0         0 $self->{'histo'}->{'bins'}->{'c'} = (0) x $self->{'histo'}->{'num-bins'};
166             } elsif( $aref eq 'HASH' ){
167             # a hashref keyed on bin-intervals in the form FROM:TO->counts
168 0         0 $self->_hash2bins($m1);
169 0         0 } else { die "parameter 'bins' expects either a HASHREF keyed on bin-intervals in the form FROM:TO->counts (and counts can be non-zero if that is a previous histogram), or an ARRAYREF with bin boundaries of the form [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary]. In this case the number of bins is 1 less than the length of the array." }
170             } elsif( defined($m1=$params->{'bin-width'})
171             && defined($m2=$params->{'num-bins'})
172             && defined($m3=$params->{'left-boundary'})
173             ){
174             # we re-create our own bins based on num-bins etc.
175 7         18 $self->_histogram_create_bins_from_spec($m1, $m2, $m3)
176             } else {
177             # no params! we need params
178 0         0 print STDERR "enable_histogram() : failed to enable histogram because no histogram specification was supplied. Try enable_histogram({bin-width=>1, nun-bins=>10, left-boundary=>-5});\n";
179             }
180 7         16 $self->{'histo'}->{'needs-recalculate'} = 1;
181             }
182             # set existing histogram to zero counts
183             sub histogram_reset {
184 18     18 1 22 my $self = $_[0];
185              
186             # no params, set all counts OF ALREADY EXISTING histogram to zero
187 18         34 my $m1 = $self->{'histo'}->{'bins'}->{'c'};
188 18         53 for(my $i=$self->{'histo'}->{'num-bins'};$i-->0;){ $m1->[$i] = 0 }
  0         0  
189 18         39 $self->{'histo'}->{'needs-recalculate'} = 1;
190             }
191             # push Data: a sample and process/update mean and all other stat measures
192             # also insert it in histogram
193             sub add {
194 756     756 1 2084 my $self = $_[0];
195 756         987 my $x = $_[1];
196              
197 756         1006 my $aref = ref($x);
198              
199 756 100       1289 if( $aref eq '' ){
    50          
200             # a scalar input
201 753         1068 my ($delta, $delta_n, $delta_n2, $term1);
202 753         1082 my $n1 = $self->{'N'};
203 753 100       1172 if( $n1 == 0 ){ $self->{'MIN'} = $self->{'MAX'} = $x }
  10         21  
204             else {
205 743 100       1289 if( $x < $self->{'MIN'} ){ $self->{'MIN'} = $x }
  17         33  
206 743 100       1308 if( $x > $self->{'MAX'} ){ $self->{'MAX'} = $x }
  149         217  
207             }
208 753         1031 $self->{'SUM'} += $x; # add x to the total SUM
209 753         1087 $self->{'ABS-SUM'} += abs($x); # add abs(x) to the total SUM
210 753         1010 $self->{'N'} += 1; # increment sample size push in
211 753         1034 my $n0 = $self->{'N'};
212              
213 753         1016 $delta = $x - $self->{'M1'};
214 753         1018 $delta_n = $delta / $n0;
215 753         987 $delta_n2 = $delta_n * $delta_n;
216 753         1035 $term1 = $delta * $delta_n * $n1;
217 753         1006 $self->{'M1'} += $delta_n;
218             $self->{'M4'} += $term1 * $delta_n2 * ($n0*$n0 - 3*$n0 + 3)
219             + 6 * $delta_n2 * $self->{'M2'}
220 753         1604 - 4 * $delta_n * $self->{'M3'}
221             ;
222             $self->{'M3'} += $term1 * $delta_n * ($n0 - 2)
223 753         1274 - 3 * $delta_n * $self->{'M2'}
224             ;
225 753         979 $self->{'M2'} += $term1;
226             # add data point to the internal histogram
227 753         1276 $self->_histogram_add($x);
228             } elsif( $aref eq 'ARRAY' ){
229             # an array input
230 3         6 foreach (@$x){ $self->add($_) }
  302         454  
231             } else {
232 0         0 die "add(): only ARRAY and SCALAR can be handled (input was type '$aref')."
233             }
234             }
235             # copies input(=src) Running obj into current/self overwriting our data, this is not a clone()!
236             sub copy_from {
237 1     1 1 4 my $self = $_[0];
238 1         2 my $src = $_[1];
239 1         3 $self->{'M1'} = $src->M1();
240 1         2 $self->{'M2'} = $src->M2();
241 1         3 $self->{'M3'} = $src->M3();
242 1         3 $self->{'M4'} = $src->M4();
243 1         6 $self->set_N($src->get_N());
244 1         4 $self->_histogram_copy_from($src);
245             }
246             # clones current obj into a new Running obj with same values
247             sub clone {
248 1     1 1 2 my $self = $_[0];
249 1         4 my $newO = Statistics::Running->new();
250 1         4 $newO->{'M1'} = $self->M1();
251 1         3 $newO->{'M2'} = $self->M2();
252 1         3 $newO->{'M3'} = $self->M3();
253 1         3 $newO->{'M4'} = $self->M4();
254 1         4 $newO->set_N($self->get_N());
255 1         2 return $newO
256             }
257             # clears all data entered/calculated including histogram
258             sub clear {
259 18     18 1 30 my $self = $_[0];
260 18         91 $self->{'M1'} = 0.0;
261 18         29 $self->{'M2'} = 0.0;
262 18         24 $self->{'M3'} = 0.0;
263 18         33 $self->{'M4'} = 0.0;
264 18         49 $self->{'MIN'} = 0.0;
265 18         29 $self->{'MAX'} = 0.0;
266 18         26 $self->{'ABS-SUM'} = 0.0;
267 18         27 $self->{'SUM'} = 0.0;
268 18         26 $self->{'N'} = 0;
269 18         38 $self->histogram_reset();
270             }
271             # return the mean of the data entered so far
272 4     4 1 31 sub mean { return $_[0]->{'M1'} }
273 3     3 1 10 sub sum { return $_[0]->{'SUM'} }
274 0     0 1 0 sub abs_sum { return $_[0]->{'ABS-SUM'} }
275 4     4 1 21 sub min { return $_[0]->{'MIN'} }
276 4     4 1 15 sub max { return $_[0]->{'MAX'} }
277             # get number of total elements entered so far
278 22     22 1 91 sub get_N { return $_[0]->{'N'} }
279             sub variance {
280 4     4 1 9 my $self = $_[0];
281 4         9 my $m = $self->{'N'};
282 4 50       14 if( $m == 1 ){ return 0 }
  0         0  
283 4         45 return $self->{'M2'}/($m-1.0)
284             }
285 4     4 1 21 sub standard_deviation { return sqrt($_[0]->variance()) }
286             sub skewness {
287 3     3 1 6 my $self = $_[0];
288 3         7 my $m = $self->{'M2'};
289 3 100       8 if( $m == 0 ){ return 0 }
  1         4  
290             return sqrt($self->{'N'})
291 2         18 * $self->{'M3'} / ($m ** 1.5)
292             ;
293             }
294             sub kurtosis {
295 4     4 1 8 my $self = $_[0];
296 4         8 my $m = $self->{'M2'};
297 4 100       14 if( $m == 0 ){ return 0 }
  2         10  
298             return $self->{'N'}
299 2         14 * $self->{'M4'}
300             / ($m * $m)
301             - 3.0
302             ;
303             }
304             # concatenates another Running obj with current
305             # AND returns a new Running obj with concatenated stats
306             # Current object is not modified.
307             sub concatenate {
308 4     4 1 25 my $self = $_[0]; # us
309 4         8 my $other = $_[1]; # another Running obj
310              
311 4         10 my $combined = Statistics::Running->new();
312              
313 4         11 my $selfN = $self->get_N();
314 4         24 my $otherN = $other->get_N();
315 4         11 my $selfM2 = $self->M2();
316 4         9 my $otherM2 = $other->M2();
317 4         11 my $selfM3 = $self->M3();
318 4         9 my $otherM3 = $other->M3();
319              
320 4         9 my $combN = $selfN + $otherN;
321 4         12 $combined->set_N($combN);
322            
323 4         10 my $delta = $other->M1() - $self->M1();
324 4         8 my $delta2 = $delta*$delta;
325 4         9 my $delta3 = $delta*$delta2;
326 4         6 my $delta4 = $delta2*$delta2;
327              
328 4         10 $combined->{'M1'} = ($selfN*$self->M1() + $otherN*$other->M1()) / $combN;
329              
330 4         11 $combined->{'M2'} = $selfM2 + $otherM2 +
331             $delta2 * $selfN * $otherN / $combN;
332            
333 4         18 $combined->{'M3'} = $selfM3 + $otherM3 +
334             $delta3 * $selfN * $otherN * ($selfN - $otherN)/($combN*$combN) +
335             3.0*$delta * ($selfN*$otherM2 - $otherN*$selfM2) / $combN
336             ;
337            
338 4         30 $combined->{'M4'} = $self->{'M4'} + $other->{'M4'}
339             + $delta4*$selfN*$otherN * ($selfN*$selfN - $selfN*$otherN + $otherN*$otherN) /
340             ($combN*$combN*$combN)
341             + 6.0*$delta2 * ($selfN*$selfN*$otherM2 + $otherN*$otherN*$selfM2)/($combN*$combN) +
342             4.0*$delta*($selfN*$otherM3 - $otherN*$selfM3) / $combN
343             ;
344              
345 4         11 $combined->{'SUM'} = $self->{'SUM'} + $other->{'SUM'};
346 4         16 $combined->{'ABS-SUM'} = $self->{'ABS-SUM'} + $other->{'ABS-SUM'};
347 4 100       20 $combined->{'MIN'} = $self->{'MIN'} < $other->{'MIN'} ? $self->{'MIN'} : $other->{'MIN'};
348 4 50       13 $combined->{'MAX'} = $self->{'MAX'} > $other->{'MAX'} ? $self->{'MAX'} : $other->{'MAX'};
349              
350             # add the histograms only if structure matches:
351 4 100       12 if( $self->_equals_histograms_structure($other) ){
352 1         4 $combined->_histogram_copy_from($self);
353 1         5 $combined->_add_histograms($other);
354             }
355              
356 4         19 return $combined;
357             }
358             # appends another Running obj INTO current
359             # histogram data is appended only if histogram specs are the same
360             # current obj (self) IS MODIFIED
361             sub append {
362 0     0 1 0 my $self = $_[0]; # us
363 0         0 my $other = $_[1]; # another Running obj
364 0         0 $self->copy_from($self+$other);
365             }
366             # equality only wrt to stats BUT NOT histogram
367             sub equals {
368 4     4 1 17 my $self = $_[0]; # us
369 4         7 my $other = $_[1]; # another Running obj
370             return
371 4   66     8 $self->get_N() == $other->get_N() &&
372             $self->equals_statistics($other)
373             }
374             sub equals_statistics {
375 5     5 1 17 my $self = $_[0]; # us
376 5         9 my $other = $_[1]; # another Running obj
377             return
378 5   33     9 abs($self->M1()-$other->M1()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY &&
379             abs($self->M2()-$other->M2()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY &&
380             abs($self->M3()-$other->M3()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY &&
381             abs($self->M4()-$other->M4()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY
382             }
383             # checks if structure is same and then if bin contents (counts) are same
384             # returns 1 if equals
385             # returns 0 if either structure or counts are not the same
386             sub equals_histograms {
387 2     2 1 11 my $self = $_[0]; # us
388 2         4 my $other = $_[1]; # another Running obj
389              
390             # structure is not the same
391 2 50       7 if( $self->_equals_histograms_structure($other) == 0 ){ return 0 }
  0         0  
392              
393 2         5 my $selfC = $self->{'histo'}->{'bins'}->{'c'};
394 2         4 my $otherC = $other->{'histo'}->{'bins'}->{'c'};
395 2         3 my $i;
396 2         6 for($i=$self->{'histo'}->{'num-bins'};$i-->0;){
397 10 50       23 if( $selfC->[$i] != $otherC->[$i] ){ return 0 }
  0         0  
398             }
399 2         8 return 1 # equal in structure and counts
400             }
401             # adds counts of histograms to us from other
402             # returns 0 if structures do not match
403             # returns 1 if counts added OK
404             sub _add_histograms {
405 1     1   3 my $self = $_[0]; # us
406 1         2 my $other = $_[1]; # another Running obj
407              
408             # structure is not the same
409 1 50       8 if( $self->_equals_histograms_structure($other) == 0 ){ return 0 }
  0         0  
410              
411 1         3 my $selfC = $self->{'histo'}->{'bins'}->{'c'};
412 1         2 my $otherC = $other->{'histo'}->{'bins'}->{'c'};
413 1         2 my $i;
414 1         4 for($i=$self->{'histo'}->{'num-bins'};$i-->0;){
415 10         18 $selfC->[$i] += $otherC->[$i];
416             }
417 1         2 $self->{'histo'}->{'needs-recalculate'} = 1;
418 1         2 return 1 # counts added
419             }
420             # print object as a string, string concat/printing is overloaded on this method
421             sub stringify {
422 3     3 1 15 my $self = $_[0];
423 3         17 return "N: ".$self->get_N()
424             .", mean: ".$self->mean()
425             .", range: ".$self->min()." to ".$self->max()
426             .", sum: ".$self->sum()
427             .", standard deviation: ".$self->standard_deviation()
428             .", kurtosis: ".$self->kurtosis()
429             .", skewness: ".$self->skewness()
430             .", histogram:\n".$self->histogram_stringify()
431             }
432             # internal methods, no need for anyone to know or use externally
433 6     6 0 11 sub set_N { $_[0]->{'N'} = $_[1] }
434 28     28 0 72 sub M1 { return $_[0]->{'M1'} }
435 20     20 0 49 sub M2 { return $_[0]->{'M2'} }
436 20     20 0 48 sub M3 { return $_[0]->{'M3'} }
437 12     12 0 44 sub M4 { return $_[0]->{'M4'} }
438             # copy src's histogram to us, erasing previous data and histo-format
439             sub _histogram_copy_from {
440 2     2   5 my $self = $_[0];
441 2         4 my $src = $_[1]; # a src stats object whose histogram we are copying onto us
442 2         6 $self->histogram_bins_hash($src->histogram_bins_hash());
443             }
444             # given bin-width, num-bins and left-boundary create the bin arrays
445             sub _histogram_create_bins_from_spec {
446 7     7   17 my ($self, $bw, $nb, $lb) = @_;
447              
448 7         12 $self->{'histo'}->{'num-bins'} = $nb;
449 7         24 my @B = (0)x($nb+1);
450 7         11 my ($i);
451 7         8 my $v = $lb;
452 7         18 for($i=0;$i<=$nb;$i++){
453 63         82 $B[$i] = $v;
454 63         103 $v += $bw;
455             }
456 7         15 $self->{'histo'}->{'bins'}->{'b'} = \@B;
457 7         18 $self->{'histo'}->{'bins'}->{'c'} = [(0)x$nb];
458 7         15 $self->{'histo'}->{'needs-recalculate'} = 1;
459             }
460             # add a datapoint to the histogram, this is usually called only via the public add()
461             sub _histogram_add {
462 753     753   1021 my $self = $_[0];
463 753         3531 my $x = $_[1]; # value to add
464 753         1027 my ($n, $i);
465 753 100       1407 if( ($n=$self->{'histo'}->{'num-bins'}) <= 0 ){ return }
  502         958  
466 251         331 my $B = $self->{'histo'}->{'bins'}->{'b'};
467 251         459 for($i=0;$i<$n;$i++){
468 1090 100 100     3178 if( ($x > $B->[$i]) && ($x <= $B->[$i+1]) ){
469 146         205 $self->{'histo'}->{'bins'}->{'c'}->[$i]++;
470 146         210 $self->{'histo'}->{'needs-recalculate'} = 1; # need to recalc stringify
471             return
472 146         318 }
473             }
474             }
475             # given the bins and bin counts arrays, return a hash in the natural form:
476             # from-bin:to-bin -> count
477             # see also _bins2stathash for returning a hash of the format specified in Statistics::Descriptive
478             sub _bins2hash {
479 6     6   10 my $self = $_[0];
480 6         10 my %ret = ();
481 6         12 my $B = $self->{'histo'}->{'bins'}->{'b'};
482 6         9 my $C = $self->{'histo'}->{'bins'}->{'c'};
483 6         9 my $i;
484 6         17 for($i=$self->{'histo'}->{'num-bins'};$i-->0;){
485 45         127 $ret{$B->[$i].":".$B->[$i+1]} = $C->[$i]
486             }
487 6         27 return \%ret
488             }
489             # given the bins and bin counts arrays, return a hash with keys
490             # to-bin -> count
491             # whereas count is the count of the bin specified by to-bin and its previous key of the hash
492             sub _bins2stathash {
493 2     2   4 my $self = $_[0];
494 2         4 my %ret = ();
495 2         13 my $B = $self->{'histo'}->{'bins'}->{'b'};
496 2         6 my $C = $self->{'histo'}->{'bins'}->{'c'};
497 2         3 my $i;
498 2         8 for($i=$self->{'histo'}->{'num-bins'}-1;$i-->0;){
499 18         75 $ret{$B->[$i+1]} = $C->[$i]
500             }
501 2         17 return \%ret
502             }
503             # given a hash with keys
504             # from-bin:to-bin -> count
505             # erase and re-create the bin and counts arrays of histo.
506             # for a way to import Statistics::Descriptive frequency_distribution hash check _stathash2bins()
507             sub _hash2bins {
508 3     3   6 my $self = $_[0];
509 3         5 my $H = $_[1];
510 3         6 my @B = ();
511 3         5 my @C = ();
512 3         13 my @K = keys %$H;
513 3         7 $self->{'histo'}->{'num-bins'} = scalar(@K);
514 3         6 my ($acount, $akey);
515              
516             my @X = map {
517 15         25 push(@B, $_->[1]); # left-bin (from)
518 15         25 push(@C, $H->{$_->[0]}); # counts
519 15         24 $_->[2]; # spit out the right-bin (to)
520             }
521 27         44 sort { $a->[1] <=> $b->[1] }
522 3         8 map { [ $_, split(/\:/, $_) ] }
  15         59  
523             @K
524             ;
525 3         16 push(@B, $X[-1]);
526 3         12 $self->{'histo'}->{'bins'}->{'b'} = \@B;
527 3         8 $self->{'histo'}->{'bins'}->{'c'} = \@C;
528 3         13 $self->{'histo'}->{'needs-recalculate'} = 1;
529             }
530             # given a hash with keys
531             # to-bin -> count
532             # erase and re-create the bin and counts arrays of histo.
533             # the hash is exactly what Statistics::Descriptive::frequency_distribution() returns
534             # there is only one problem: what is the left-boundary? we will set it to -infinity.
535             sub _stathash2bins {
536 0     0   0 my $self = $_[0];
537 0         0 my $H = $_[1]; # hashref: exactly what Statistics::Descriptive::frequency_distribution() returns
538 0         0 my @B = ();
539 0         0 my @C = ();
540 0         0 my @K = keys %$H;
541 0         0 $self->{'histo'}->{'num-bins'} = scalar(@K);
542 0         0 my ($acount, $akey);
543              
544 0         0 push(@B, -(~0 >> 1)); # -MAX_INT fuck you.
545 0         0 foreach my $k (sort { $a <=> $b } keys %$H){
  0         0  
546 0         0 push(@B, $k);
547 0         0 push(@C, $H->{$k});
548             }
549 0         0 $self->{'histo'}->{'bins'}->{'b'} = \@B;
550 0         0 $self->{'histo'}->{'bins'}->{'c'} = \@C;
551 0         0 $self->{'histo'}->{'needs-recalculate'} = 1;
552             }
553             # compares the structure of the histograms of us and another obj
554             # if histograms have same number of bins and same bin-specs (boundaries)
555             # then histograms are equal and returns 1
556             # if both histograms contain zero bins (not initialised) then also returns 1
557             # else, histogram structure differs and returns 0
558             sub _equals_histograms_structure {
559 7     7   16 my ($self, $other) = @_;
560              
561 7         15 my $NB1 = $self->{'histo'}->{'num-bins'};
562 7 100       20 if( $NB1 != $other->{'histo'}->{'num-bins'} ){ return 0 }
  3         9  
563              
564             # no bins, so equal!
565 4 50       10 if( $NB1 == -1 ){ return 1 }
  0         0  
566              
567 4         7 my $b1 = $self->{'histo'}->{'bins'}->{'b'};
568 4         6 my $b2 = $other->{'histo'}->{'bins'}->{'b'};
569 4         12 for(my $i=$NB1+1;$i-->0;){
570 34 50       74 if( $b1->[$i] != $b2->[$i] ){ return 0 }
  0         0  
571             }
572 4         16 return 1 # equal histogram STRUCTURES (not bincounts)
573             }
574             1;
575             __END__