| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Statistics::Descriptive::Weighted; | 
| 2 |  |  |  |  |  |  | $VERSION = '0.7'; | 
| 3 | 1 |  |  | 1 |  | 42703 | use Statistics::Descriptive; | 
|  | 1 |  |  |  |  | 22300 |  | 
|  | 1 |  |  |  |  | 42 |  | 
| 4 | 1 |  |  | 1 |  | 1327 | use Data::Dumper; | 
|  | 1 |  |  |  |  | 21359 |  | 
|  | 1 |  |  |  |  | 513 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | package Statistics::Descriptive::Weighted::Sparse; | 
| 7 | 1 |  |  | 1 |  | 10 | use strict; | 
|  | 1 |  |  |  |  | 10 |  | 
|  | 1 |  |  |  |  | 47 |  | 
| 8 | 1 |  |  | 1 |  | 5 | use vars qw($AUTOLOAD @ISA %fields); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 85 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | @ISA = qw(Statistics::Descriptive::Sparse); | 
| 11 |  |  |  |  |  |  |  | 
| 12 | 1 |  |  | 1 |  | 5 | use Carp qw(cluck confess); | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 987 |  | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | ##Define a new field to be used as method, to | 
| 15 |  |  |  |  |  |  | ##augment the ones inherited | 
| 16 |  |  |  |  |  |  | %fields = ( | 
| 17 |  |  |  |  |  |  | mean                      => 0, | 
| 18 |  |  |  |  |  |  | weight                    => 0, | 
| 19 |  |  |  |  |  |  | sum_squares               => 0, | 
| 20 |  |  |  |  |  |  | weight_homozyg            => 0, | 
| 21 |  |  |  |  |  |  | biased_variance           => 0, | 
| 22 |  |  |  |  |  |  | biased_standard_deviation => 0 | 
| 23 |  |  |  |  |  |  | ); | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | __PACKAGE__->_make_accessors( [ grep { $_ ne "weight" } keys(%fields) ] ); | 
| 26 |  |  |  |  |  |  | __PACKAGE__->_make_private_accessors(["weight"]); | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | ##Have to override the base method to add new fields to the object | 
| 29 |  |  |  |  |  |  | ##The proxy method from base class is still valid | 
| 30 |  |  |  |  |  |  | sub new { | 
| 31 | 15 |  |  | 15 |  | 3322 | my $proto = shift; | 
| 32 | 15 |  | 33 |  |  | 87 | my $class = ref($proto) || $proto; | 
| 33 | 15 |  |  |  |  | 87 | my $self = $class->SUPER::new();  ##Create my self re SUPER | 
| 34 | 15 |  |  |  |  | 500 | @{ $self->{'_permitted'} } {keys %fields} = values %fields; | 
|  | 15 |  |  |  |  | 65 |  | 
| 35 | 15 |  |  |  |  | 52 | @{ $self } {keys %fields} = values %fields; | 
|  | 15 |  |  |  |  | 43 |  | 
| 36 | 15 |  |  |  |  | 47 | bless ($self, $class);  #Re-anneal the object | 
| 37 | 15 |  |  |  |  | 55 | return $self; | 
| 38 |  |  |  |  |  |  | } | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | sub add_data { | 
| 41 | 22 |  |  | 22 |  | 43 | my $self = shift;  ##Myself | 
| 42 | 22 |  |  |  |  | 35 | my $oldmean; | 
| 43 |  |  |  |  |  |  | my $oldweight; | 
| 44 | 0 |  |  |  |  | 0 | my ($min,$max); | 
| 45 | 0 |  |  |  |  | 0 | my $aref; | 
| 46 |  |  |  |  |  |  |  | 
| 47 | 22 | 100 | 66 |  |  | 182 | if ( (not ref $_[0] eq 'ARRAY') || (exists $_[1] and (not (ref $_[1] eq 'ARRAY') || @{$_[0]} != @{$_[1]} ) ) ) { | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 66 |  |  |  |  | 
| 48 | 4 |  |  |  |  | 599 | cluck "WARNING: Expected input are two references to two arrays of equal length; first data, then positive weights. Second array is optional.\n"; | 
| 49 | 4 |  |  |  |  | 468 | return undef; | 
| 50 |  |  |  |  |  |  | } | 
| 51 |  |  |  |  |  |  |  | 
| 52 | 18 |  |  |  |  | 36 | my ($datum,$weight) = @_; | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | ##Calculate new mean, pseudo-variance, min and max; | 
| 55 |  |  |  |  |  |  | ##The on-line weighted incremental algorithm for variance is based on West 1979 from Wikipedia | 
| 56 |  |  |  |  |  |  | ##D. H. D. West (1979). Communications of the ACM, 22, 9, 532-535: Updating Mean and Variance Estimates: An Improved Method | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | ## NEW in Version 0.4: | 
| 59 |  |  |  |  |  |  | ## I calculate a sample weighted variance based on normalized weights rather than the sample size | 
| 60 |  |  |  |  |  |  | ## correction factor is: 1 / (1 - sum [w_i / (sum w_i) ]^2) | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | ## call H = sum [w_i / (sum w_i) ]^2. An online update eq for H is  H_new =  (sum.w_old^2 * H_old) + weight^2) / sum.w^2 | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | ## correction factor is then 1 / (1 - H_new) | 
| 65 |  |  |  |  |  |  |  | 
| 66 | 18 |  |  |  |  | 22 | my $weighterror; | 
| 67 | 18 |  |  |  |  | 55 | for (0..$#$datum ) { | 
| 68 | 247 | 100 |  |  |  | 539 | if (not defined $$weight[$_]) { | 
| 69 | 4 |  |  |  |  | 8 | $$weight[$_] = 1; | 
| 70 |  |  |  |  |  |  | } | 
| 71 | 247 | 100 |  |  |  | 530 | if ($$weight[$_] <= 0) { | 
| 72 | 1 |  |  |  |  | 10 | $weighterror = 1; | 
| 73 | 1 |  |  |  |  | 3 | next; | 
| 74 |  |  |  |  |  |  | } | 
| 75 | 246 |  |  |  |  | 355 | $oldmean = $self->{mean}; | 
| 76 | 246 |  |  |  |  | 285 | $oldweight = $self->{weight}; | 
| 77 | 246 |  |  |  |  | 343 | $self->{weight} += $$weight[$_]; | 
| 78 | 246 |  |  |  |  | 653 | $self->{weight_homozyg} = ((($oldweight ** 2 * $self->{weight_homozyg}) + $$weight[$_] ** 2) / ( $self->{weight} ** 2 )); | 
| 79 | 246 |  |  |  |  | 295 | $self->{count}++; | 
| 80 | 246 |  |  |  |  | 400 | $self->{sum} += ($$weight[$_] * $$datum[$_]); | 
| 81 | 246 |  |  |  |  | 628 | $self->{mean} += (($$weight[$_] / $self->{weight} ) * ($$datum[$_] - $oldmean)); | 
| 82 | 246 |  |  |  |  | 600 | $self->{sum_squares} += (($$weight[$_] / $self->{weight} ) * ($$datum[$_] - $oldmean) ** 2) * $oldweight; | 
| 83 | 246 | 100 | 100 |  |  | 1108 | if (not defined $self->{max} or $$datum[$_] > $self->{max}) { | 
| 84 | 243 |  |  |  |  | 368 | $self->{max} = $$datum[$_]; | 
| 85 |  |  |  |  |  |  | } | 
| 86 | 246 | 100 | 100 |  |  | 1042 | if (not defined $self->{min} or $$datum[$_] < $self->{min}) { | 
| 87 | 15 |  |  |  |  | 32 | $self->{min} = $$datum[$_]; | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  | } | 
| 90 | 18 | 100 |  |  |  | 198 | cluck "WARNING: One or more data with nonpositive weights were skipped.\n" if ($weighterror); | 
| 91 | 18 |  |  |  |  | 179 | $self->{sample_range} = $self->{max} - $self->{min}; | 
| 92 |  |  |  |  |  |  |  | 
| 93 | 18 | 100 |  |  |  | 48 | if ($self->{count} > 1) { | 
| 94 | 14 |  |  |  |  | 41 | $self->{variance}     = ($self->{sum_squares} / ((1 - $self->{weight_homozyg}) * $self->{weight})); | 
| 95 | 14 |  |  |  |  | 43 | $self->{standard_deviation}  = sqrt( $self->{variance}); | 
| 96 | 14 |  |  |  |  | 30 | $self->{biased_variance}     = ($self->{sum_squares} / $self->{weight}); | 
| 97 | 14 |  |  |  |  | 28 | $self->{biased_standard_deviation}  = sqrt( $self->{biased_variance}); | 
| 98 |  |  |  |  |  |  | } | 
| 99 | 18 |  |  |  |  | 67 | return 1; | 
| 100 |  |  |  |  |  |  | } | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | sub weight { | 
| 103 | 12 |  |  | 12 |  | 8374 | my $self = shift; | 
| 104 | 12 | 100 |  |  |  | 37 | if (@_ > 0) { | 
| 105 | 2 |  |  |  |  | 399 | cluck "WARNING: Sparse statistics object expects zero arguments to weight function, returns sum of weights."; | 
| 106 |  |  |  |  |  |  | } | 
| 107 | 12 |  |  |  |  | 228 | return $self->_weight(); | 
| 108 |  |  |  |  |  |  | } | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | ## OVERRIDES FOR UNSUPPORTED FUNCTIONS | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | sub mindex{ | 
| 113 | 0 |  |  | 0 |  | 0 | confess "ERROR: Statistics::Descriptive::Weighted does not support this function."; | 
| 114 |  |  |  |  |  |  | } | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | sub maxdex{ | 
| 117 | 0 |  |  | 0 |  | 0 | confess "ERROR: Statistics::Descriptive::Weighted does not support this function."; | 
| 118 |  |  |  |  |  |  | } | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | 1; | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | package Statistics::Descriptive::Weighted::Full; | 
| 123 |  |  |  |  |  |  |  | 
| 124 | 1 |  |  | 1 |  | 6 | use Carp qw(cluck confess); | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 62 |  | 
| 125 | 1 |  |  | 1 |  | 1218 | use Tree::Treap; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 36 |  | 
| 126 | 1 |  |  | 1 |  | 10 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 45 |  | 
| 127 | 1 |  |  | 1 |  | 5 | use vars qw(@ISA %fields); | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 3590 |  | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | @ISA = qw(Statistics::Descriptive::Weighted::Sparse); | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | ##Create a list of fields not to remove when data is updated | 
| 132 |  |  |  |  |  |  | %fields = ( | 
| 133 |  |  |  |  |  |  | _permitted => undef,  ##Place holder for the inherited key hash | 
| 134 |  |  |  |  |  |  | data       => undef,  ##keys from variate values to a hashref with keys weight, cdf, tail-prob | 
| 135 |  |  |  |  |  |  | did_cdf    => undef,  ##flag to indicate whether CDF/quantile fun has been computed or not | 
| 136 |  |  |  |  |  |  | quantile   => undef,  ##"hash" for quantile function | 
| 137 |  |  |  |  |  |  | percentile => undef,  ##"hash" for percentile function | 
| 138 |  |  |  |  |  |  | maxweight  => 0, | 
| 139 |  |  |  |  |  |  | mode       => undef, | 
| 140 |  |  |  |  |  |  | order      => 1, | 
| 141 |  |  |  |  |  |  | _reserved  => undef,  ##Place holder for this lookup hash | 
| 142 |  |  |  |  |  |  | ); | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | ##Have to override the base method to add the data to the object | 
| 145 |  |  |  |  |  |  | ##The proxy method from above is still valid | 
| 146 |  |  |  |  |  |  | sub new { | 
| 147 | 8 |  |  | 8 |  | 3455 | my $proto = shift; | 
| 148 | 8 |  | 33 |  |  | 43 | my $class = ref($proto) || $proto; | 
| 149 | 8 |  |  |  |  | 39 | my $self = $class->SUPER::new();  ##Create my self re SUPER | 
| 150 | 8 |  |  |  |  | 41 | $self->{data}           = new Tree::Treap("num");  ## inserts data by numeric comparison | 
| 151 | 8 |  |  |  |  | 16 | $self->{did_cdf}        = 0; | 
| 152 | 8 |  |  |  |  | 25 | $self->{maxweight}      = 0; | 
| 153 | 8 |  |  |  |  | 27 | $self->{quantile}       = new Tree::Treap("num"); | 
| 154 | 8 |  |  |  |  | 31 | $self->{percentile}     = new Tree::Treap("num"); | 
| 155 | 8 |  |  |  |  | 18 | $self->{order}          = 1; | 
| 156 | 8 |  |  |  |  | 17 | $self->{'_reserved'} = \%fields; | 
| 157 | 8 |  |  |  |  | 21 | bless ($self, $class); | 
| 158 | 8 |  |  |  |  | 41 | return $self; | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | ## The treap gives relatively fast search and good performance on possibly sorted data | 
| 162 |  |  |  |  |  |  | ## The choice is motivated by heavy intended use for Empirical Distribution Function | 
| 163 |  |  |  |  |  |  | ## A lot of work is done at insertion for faster computation on search | 
| 164 |  |  |  |  |  |  | ## THE ACTUAL DATA INSERTION IS DONE AT FUNCTION _addweight | 
| 165 |  |  |  |  |  |  | ## The data structure loses information. Like a hash keys appear only once. | 
| 166 |  |  |  |  |  |  | ## The value of a key is its sum of weight for that key, and the cumulative weight | 
| 167 |  |  |  |  |  |  | sub add_data { | 
| 168 | 9 |  |  | 9 |  | 20 | my $self = shift; | 
| 169 | 9 |  |  |  |  | 11 | my $key; | 
| 170 |  |  |  |  |  |  |  | 
| 171 | 9 | 50 | 33 |  |  | 95 | if ( (not ref $_[0] eq 'ARRAY') || (exists $_[1] and (not (ref $_[1] eq 'ARRAY') || @{$_[0]} != @{$_[1]} ) ) ) { | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
| 172 | 0 |  |  |  |  | 0 | cluck "WARNING: Expected input are two references to two arrays of equal length; first data, then positive weights. Second array is optional.\n"; | 
| 173 | 0 |  |  |  |  | 0 | return undef; | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  |  | 
| 176 | 9 |  |  |  |  | 152 | my ($datum,$weight) = @_; | 
| 177 | 9 |  |  |  |  | 14 | my $filterdatum = []; | 
| 178 | 9 |  |  |  |  | 26 | my $filterweight = []; | 
| 179 | 9 |  |  |  |  | 12 | my $weighterror; | 
| 180 |  |  |  |  |  |  | my $newweight; | 
| 181 | 9 |  |  |  |  | 30 | for (0..$#$datum) { | 
| 182 | 222 | 100 |  |  |  | 517 | if (not defined $$weight[$_]) { | 
| 183 | 5 |  |  |  |  | 14 | $$weight[$_] = 1; | 
| 184 |  |  |  |  |  |  | } | 
| 185 | 222 | 100 |  |  |  | 619 | if ($$weight[$_] > 0) { | 
| 186 | 221 |  |  |  |  | 422 | push @$filterdatum,$$datum[$_]; | 
| 187 | 221 |  |  |  |  | 1240 | push @$filterweight,$$weight[$_]; | 
| 188 | 221 |  |  |  |  | 558 | $newweight = $self->_addweight($$datum[$_], $$weight[$_]); | 
| 189 | 221 | 100 |  |  |  | 614 | if ($newweight > $self->{maxweight}) { | 
| 190 | 9 |  |  |  |  | 14 | $self->{maxweight} = $newweight; | 
| 191 | 9 |  |  |  |  | 61 | $self->{mode} = $$datum[$_]; | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  | } | 
| 194 |  |  |  |  |  |  | else { | 
| 195 | 1 |  |  |  |  | 4 | $weighterror = 1; | 
| 196 |  |  |  |  |  |  | } | 
| 197 |  |  |  |  |  |  | } | 
| 198 | 9 | 100 |  |  |  | 264 | cluck "WARNING: One or more data with nonpositive weights were skipped.\n" if ($weighterror); | 
| 199 | 9 |  |  |  |  | 265 | $self->SUPER::add_data($filterdatum,$filterweight);  ##Perform base statistics on the data | 
| 200 |  |  |  |  |  |  | ##Clear the did_cdf flag | 
| 201 | 9 |  |  |  |  | 15 | $self->{did_cdf} = 0; | 
| 202 |  |  |  |  |  |  | ##Need to delete all cached keys | 
| 203 | 9 |  |  |  |  | 16 | foreach $key (keys %{ $self }) { # Check each key in the object | 
|  | 9 |  |  |  |  | 78 |  | 
| 204 |  |  |  |  |  |  | # If it's a reserved key for this class, keep it | 
| 205 | 223 | 100 |  |  |  | 460 | next if exists $self->{'_reserved'}->{$key}; | 
| 206 |  |  |  |  |  |  | # If it comes from the base class, keep it | 
| 207 | 143 | 100 |  |  |  | 342 | next if exists $self->{'_permitted'}->{$key}; | 
| 208 | 8 |  |  |  |  | 21 | delete $self->{$key};          # Delete the out of date cached key | 
| 209 |  |  |  |  |  |  | } | 
| 210 | 9 |  |  |  |  | 131 | return 1; | 
| 211 |  |  |  |  |  |  | } | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | sub count { | 
| 214 | 232 |  |  | 232 |  | 2223 | my $self = shift; | 
| 215 | 232 | 100 |  |  |  | 448 | if (@_ == 1) {  ##Inquire | 
|  |  | 50 |  |  |  |  |  | 
| 216 | 221 |  |  |  |  | 699 | my $val = $self->{data}->get_val($_[0]); | 
| 217 | 221 | 100 |  |  |  | 1099 | return (defined $val ? ${ $val }{'count'} : $val); | 
|  | 1 |  |  |  |  | 5 |  | 
| 218 |  |  |  |  |  |  | } | 
| 219 |  |  |  |  |  |  | elsif (@_ == 0) {  ##Inquire | 
| 220 | 11 |  |  |  |  | 39 | return $self->{count}; | 
| 221 |  |  |  |  |  |  | } | 
| 222 |  |  |  |  |  |  | else { | 
| 223 | 0 |  |  |  |  | 0 | cluck "WARNING: Only 1 or fewer arguments expected."; | 
| 224 |  |  |  |  |  |  | } | 
| 225 | 0 |  |  |  |  | 0 | return 1; | 
| 226 |  |  |  |  |  |  | } | 
| 227 |  |  |  |  |  |  |  | 
| 228 |  |  |  |  |  |  | sub weight { | 
| 229 | 442 |  |  | 442 |  | 1704 | my $self = shift; | 
| 230 | 442 | 100 |  |  |  | 901 | if (@_ == 1) {  ##Inquire | 
|  |  | 50 |  |  |  |  |  | 
| 231 | 440 |  |  |  |  | 1438 | my $val = $self->{data}->get_val($_[0]); | 
| 232 | 440 | 100 |  |  |  | 1462 | return (defined $val ? ${ $val }{'weight'} : $val); | 
|  | 219 |  |  |  |  | 599 |  | 
| 233 |  |  |  |  |  |  | } | 
| 234 |  |  |  |  |  |  | elsif (@_ == 0) {  ##Inquire | 
| 235 | 2 |  |  |  |  | 11 | return $self->{weight}; | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  | else { | 
| 238 | 0 |  |  |  |  | 0 | cluck "WARNING: Only 1 or fewer arguments expected."; | 
| 239 |  |  |  |  |  |  | } | 
| 240 | 0 |  |  |  |  | 0 | return 1; | 
| 241 |  |  |  |  |  |  | } | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  | sub _addweight { | 
| 244 | 221 |  |  | 221 |  | 251 | my $self = shift; | 
| 245 | 221 |  | 100 |  |  | 513 | my $oldweight = ($self->weight($_[0]) || 0); | 
| 246 | 221 |  |  |  |  | 310 | my $newweight = $_[1] + $oldweight; | 
| 247 | 221 |  |  |  |  | 761 | my $value = $self->{data}->get_val($_[0]); | 
| 248 | 221 | 100 |  |  |  | 541 | my $weights = ($value ? $$value{'weights'} : [] ); | 
| 249 | 221 |  |  |  |  | 491 | push @$weights, $_[1]; | 
| 250 | 221 | 100 |  |  |  | 469 | my $orders  = ($value ? $$value{'order'} : [] ); | 
| 251 | 221 |  |  |  |  | 461 | push @$orders, $self->{order}++; | 
| 252 | 221 |  | 100 |  |  | 473 | my $newcount  = ($self->count($_[0]) || 0) + 1; | 
| 253 | 221 | 50 |  |  |  | 411 | if (@_ == 2) {  ##Assign | 
| 254 | 221 |  |  |  |  | 1196 | my $values = {'weight' => $newweight, 'weights' => $weights, 'count' => $newcount, 'order' => $orders, 'cdf' => undef, 'rt_tail_prob' => undef, 'percentile' => undef}; | 
| 255 | 221 |  |  |  |  | 812 | $self->{data}->insert($_[0],$values); | 
| 256 |  |  |  |  |  |  | } | 
| 257 |  |  |  |  |  |  | else { | 
| 258 | 0 |  |  |  |  | 0 | cluck "WARNING: Only two arguments (key, addend) expected."; | 
| 259 |  |  |  |  |  |  | } | 
| 260 | 221 |  |  |  |  | 448 | return $newweight; | 
| 261 |  |  |  |  |  |  | } | 
| 262 |  |  |  |  |  |  |  | 
| 263 |  |  |  |  |  |  | sub _do_cdf { | 
| 264 | 6 |  |  | 6 |  | 10 | my $self = shift; | 
| 265 | 6 |  |  |  |  | 10 | my $cumweight = 0; | 
| 266 | 6 |  |  |  |  | 29 | foreach my $key ($self->{data}->keys()){ | 
| 267 | 217 |  |  |  |  | 650 | my $value = $self->{data}->get_val($key); | 
| 268 | 217 |  |  |  |  | 447 | my $keyweight = $self->weight($key); | 
| 269 |  |  |  |  |  |  |  | 
| 270 | 217 |  |  |  |  | 272 | my $oldcumweight = $cumweight; | 
| 271 | 217 |  |  |  |  | 262 | $cumweight += $keyweight; | 
| 272 |  |  |  |  |  |  |  | 
| 273 | 217 |  |  |  |  | 375 | my $propcumweight = $cumweight / $self->{weight}; | 
| 274 | 217 |  |  |  |  | 359 | my $right_tail_prob = (1 - ($oldcumweight /  $self->{weight})); | 
| 275 | 217 |  |  |  |  | 350 | my $percentile = ((100 / $self->{weight}) * ($cumweight - ($keyweight / 2))); | 
| 276 |  |  |  |  |  |  |  | 
| 277 | 217 |  |  |  |  | 342 | $$value{'cdf'} = $propcumweight; | 
| 278 | 217 |  |  |  |  | 303 | $$value{'rt_tail_prob'} = $right_tail_prob; | 
| 279 | 217 |  |  |  |  | 4108 | $$value{'percentile'} = $percentile; | 
| 280 |  |  |  |  |  |  |  | 
| 281 | 217 |  |  |  |  | 825 | $self->{data}->insert($key,$value); | 
| 282 | 217 |  |  |  |  | 667 | $self->{quantile}->insert($propcumweight,$key); | 
| 283 | 217 |  |  |  |  | 633 | $self->{percentile}->insert($percentile,$key); | 
| 284 |  |  |  |  |  |  | } | 
| 285 | 6 |  |  |  |  | 120 | $self->{did_cdf} = 1; | 
| 286 | 6 |  |  |  |  | 13 | return 1; | 
| 287 |  |  |  |  |  |  | } | 
| 288 |  |  |  |  |  |  |  | 
| 289 |  |  |  |  |  |  | sub quantile { | 
| 290 | 11 |  |  | 11 |  | 295 | my $self = shift; | 
| 291 | 11 | 100 |  |  |  | 36 | $self->_do_cdf() unless $self->{did_cdf}; | 
| 292 | 11 | 50 |  |  |  | 27 | if (@_ == 1) {  ##Inquire | 
| 293 | 11 |  |  |  |  | 13 | my $proportion = shift; | 
| 294 | 11 | 50 | 33 |  |  | 55 | cluck "WARNING: Expects an argument between 0 and 1 inclusive." if ($proportion < 0 or $proportion > 1); | 
| 295 | 11 |  |  |  |  | 40 | my @keys = $self->{quantile}->range_keys($proportion, undef); | 
| 296 | 11 |  |  |  |  | 15 | my $key = $keys[0]; ## GET THE SMALLEST QUANTILE g.e. $proportion | 
| 297 | 11 |  |  |  |  | 33 | return $self->{quantile}->get_val($keys[0]); | 
| 298 |  |  |  |  |  |  | } | 
| 299 |  |  |  |  |  |  | else { | 
| 300 | 0 |  |  |  |  | 0 | cluck "WARNING: Exactly 1 argument expected."; | 
| 301 | 0 |  |  |  |  | 0 | return undef; | 
| 302 |  |  |  |  |  |  | } | 
| 303 |  |  |  |  |  |  | } | 
| 304 |  |  |  |  |  |  |  | 
| 305 |  |  |  |  |  |  | sub percentile { | 
| 306 | 4 |  |  | 4 |  | 7 | my $self = shift; | 
| 307 | 4 | 100 |  |  |  | 16 | $self->_do_cdf() unless $self->{did_cdf}; | 
| 308 | 4 | 50 |  |  |  | 12 | if (@_ != 1) { | 
| 309 | 0 |  |  |  |  | 0 | cluck "WARNING: Exactly 1 argument expected."; | 
| 310 |  |  |  |  |  |  | } | 
| 311 | 4 |  |  |  |  | 5 | my $percent = shift; | 
| 312 | 4 | 50 | 33 |  |  | 20 | if ($percent < 0 or $percent > 100) { | 
| 313 | 0 |  |  |  |  | 0 | cluck "WARNING: Expects an argument between 0 and 100 inclusive."; | 
| 314 |  |  |  |  |  |  | } | 
| 315 | 4 |  |  |  |  | 6 | my $percentile; | 
| 316 | 4 | 50 |  |  |  | 19 | if ($percent < $self->{percentile}->minimum()) { | 
|  |  | 50 |  |  |  |  |  | 
| 317 | 0 |  |  |  |  | 0 | $percentile = $self->{data}->minimum(); | 
| 318 |  |  |  |  |  |  | } elsif ($percent > $self->{percentile}->maximum()) { | 
| 319 | 0 |  |  |  |  | 0 | $percentile = $self->{data}->maximum(); | 
| 320 |  |  |  |  |  |  | } else { | 
| 321 | 4 |  |  |  |  | 15 | my @lekeys =  $self->{percentile}->range_keys(undef,$percent); | 
| 322 | 4 |  |  |  |  | 8 | my $lekey = $lekeys[-1]; | 
| 323 | 4 |  |  |  |  | 18 | my @gekeys =  $self->{percentile}->range_keys($percent, undef); | 
| 324 | 4 |  |  |  |  | 7 | my $gekey = $gekeys[0]; | 
| 325 | 4 |  |  |  |  | 16 | my $leval = $self->{percentile}->get_val($lekey); | 
| 326 | 4 |  |  |  |  | 6 | $percentile = $leval; | 
| 327 | 4 | 100 |  |  |  | 13 | if ($gekey != $lekey) { | 
| 328 | 3 |  |  |  |  | 11 | my $geval = $self->{percentile}->get_val($gekey); | 
| 329 | 3 |  |  |  |  | 15 | $percentile += ($percent - $lekey) / ($gekey - $lekey) * ($geval - $leval); | 
| 330 |  |  |  |  |  |  | } | 
| 331 |  |  |  |  |  |  | } | 
| 332 | 4 |  |  |  |  | 22 | return $percentile; | 
| 333 |  |  |  |  |  |  | } | 
| 334 |  |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  |  | 
| 336 |  |  |  |  |  |  | sub median { | 
| 337 | 2 |  |  | 2 |  | 4 | my $self = shift; | 
| 338 |  |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | ##Cached? | 
| 340 | 2 | 50 |  |  |  | 19 | return $self->{median} if defined $self->{median}; | 
| 341 | 2 |  |  |  |  | 7 | return $self->{median} = $self->percentile(50); | 
| 342 |  |  |  |  |  |  | } | 
| 343 |  |  |  |  |  |  |  | 
| 344 |  |  |  |  |  |  | sub mode { | 
| 345 | 1 |  |  | 1 |  | 306 | my $self = shift; | 
| 346 | 1 |  |  |  |  | 6 | return $self->{mode}; | 
| 347 |  |  |  |  |  |  | } | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | sub cdf { | 
| 350 | 11 |  |  | 11 |  | 56 | my $self = shift; | 
| 351 | 11 | 50 |  |  |  | 34 | $self->_do_cdf() unless $self->{did_cdf}; | 
| 352 | 11 | 50 |  |  |  | 29 | if (@_ == 1) {  ##Inquire | 
| 353 | 11 |  |  |  |  | 16 | my $value = shift; | 
| 354 | 11 | 100 |  |  |  | 41 | return 0 if ($self->{data}->minimum() > $value); | 
| 355 | 10 |  |  |  |  | 38 | my @keys = $self->{data}->range_keys(undef, $value); | 
| 356 | 10 |  |  |  |  | 37 | my $key = $keys[-1]; ## GET THE LARGEST OBSERVED VALUE l.e. $value | 
| 357 | 10 |  |  |  |  | 17 | return ${ $self->{data}->get_val($key) }{'cdf'}; | 
|  | 10 |  |  |  |  | 45 |  | 
| 358 |  |  |  |  |  |  | } | 
| 359 |  |  |  |  |  |  | else { | 
| 360 | 0 |  |  |  |  | 0 | cluck "WARNING: Exactly 1 argument expected."; | 
| 361 | 0 |  |  |  |  | 0 | return undef; | 
| 362 |  |  |  |  |  |  | } | 
| 363 |  |  |  |  |  |  | } | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | sub survival { | 
| 366 | 4 |  |  | 4 |  | 7 | my $self = shift; | 
| 367 | 4 | 50 |  |  |  | 12 | $self->_do_cdf() unless $self->{did_cdf}; | 
| 368 | 4 | 50 |  |  |  | 10 | if (@_ == 1) {  ##Inquire | 
| 369 | 4 |  |  |  |  | 6 | my $value = shift; | 
| 370 | 4 | 100 |  |  |  | 13 | return 1 if ($self->{data}->minimum() > $value); | 
| 371 | 3 |  |  |  |  | 12 | my @keys = $self->{data}->range_keys(undef, $value); | 
| 372 | 3 |  |  |  |  | 5 | my $key = $keys[-1]; ## GET THE LARGEST OBSERVED VALUE l.e. $value | 
| 373 | 3 |  |  |  |  | 4 | return 1 - (${ $self->{data}->get_val($key) }{'cdf'}); | 
|  | 3 |  |  |  |  | 9 |  | 
| 374 |  |  |  |  |  |  | } | 
| 375 |  |  |  |  |  |  | else { | 
| 376 | 0 |  |  |  |  | 0 | cluck "WARNING: Only 1 argument expected."; | 
| 377 | 0 |  |  |  |  | 0 | return undef; | 
| 378 |  |  |  |  |  |  | } | 
| 379 |  |  |  |  |  |  | } | 
| 380 |  |  |  |  |  |  |  | 
| 381 |  |  |  |  |  |  | sub rtp { | 
| 382 | 6 |  |  | 6 |  | 9 | my $self = shift; | 
| 383 | 6 | 50 |  |  |  | 18 | $self->_do_cdf() unless $self->{did_cdf}; | 
| 384 | 6 | 50 |  |  |  | 17 | if (@_ == 1) {  ##Inquire | 
| 385 | 6 |  |  |  |  | 7 | my $value = shift; | 
| 386 | 6 | 100 |  |  |  | 22 | return 0 if ($self->{data}->maximum() < $value); | 
| 387 | 3 |  |  |  |  | 13 | my @keys = $self->{data}->range_keys($value, undef); | 
| 388 | 3 |  |  |  |  | 6 | my $key = $keys[0]; ## GET THE SMALLEST OBSERVED VALUE g.e. $value | 
| 389 | 3 |  |  |  |  | 5 | return ${ $self->{data}->get_val($key) }{'rt_tail_prob'}; | 
|  | 3 |  |  |  |  | 9 |  | 
| 390 |  |  |  |  |  |  | } | 
| 391 |  |  |  |  |  |  | else { | 
| 392 | 0 |  |  |  |  | 0 | cluck "WARNING: Only 1 argument expected."; | 
| 393 | 0 |  |  |  |  | 0 | return undef; | 
| 394 |  |  |  |  |  |  | } | 
| 395 |  |  |  |  |  |  | } | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | sub get_data { | 
| 398 | 1 |  |  | 1 |  | 2 | my $self = shift; | 
| 399 | 1 | 50 |  |  |  | 7 | $self->_do_cdf() unless $self->{did_cdf}; | 
| 400 | 1 |  |  |  |  | 6 | my ($uniqkeys, $sumweights, $keys, $weights, $counts, $cdfs, $rtps, $percentiles, $order) = ([],[],[],[],[],[],[],[],[]); | 
| 401 | 1 |  |  |  |  | 6 | my $key = $self->{'data'}->minimum(); | 
| 402 | 1 |  |  |  |  | 5 | while (defined $key){ | 
| 403 | 5 |  |  |  |  | 15 | my $value = $self->{data}->get_val($key); | 
| 404 | 5 |  |  |  |  | 10 | push @$uniqkeys, $key; | 
| 405 | 5 |  |  |  |  | 15 | push @$sumweights, $$value{'weight'}; | 
| 406 | 5 |  |  |  |  | 32 | foreach my $weight (@{ $$value{'weights'} } ) { | 
|  | 5 |  |  |  |  | 12 |  | 
| 407 | 6 |  |  |  |  | 11 | push @$keys, $key; | 
| 408 | 6 |  |  |  |  | 12 | push @$weights, $weight; | 
| 409 |  |  |  |  |  |  | } | 
| 410 | 5 |  |  |  |  | 8 | push @$order, @{ $$value{'order'}}; | 
|  | 5 |  |  |  |  | 10 |  | 
| 411 | 5 |  |  |  |  | 9 | push @$counts, $$value{'count'}; | 
| 412 | 5 |  |  |  |  | 8 | push @$cdfs,   $$value{'cdf'}; | 
| 413 | 5 |  |  |  |  | 6 | push @$rtps,   $$value{'rt_tail_prob'}; | 
| 414 | 5 |  |  |  |  | 22 | push @$percentiles,   $$value{'percentile'}; | 
| 415 | 5 |  |  |  |  | 16 | $key = $self->{'data'}->successor($key); | 
| 416 |  |  |  |  |  |  | } | 
| 417 | 1 |  |  |  |  | 26 | return {'uniqvars' => $uniqkeys, 'sumweights' => $sumweights, 'counts' => $counts, 'cdfs' => $cdfs, 'rtps' => $rtps, 'vars' => $keys, 'weights' => $weights, 'percentiles' => $percentiles, 'order' => $order}; | 
| 418 |  |  |  |  |  |  | } | 
| 419 |  |  |  |  |  |  |  | 
| 420 |  |  |  |  |  |  | sub print { | 
| 421 | 1 |  |  | 1 |  | 294 | my $self = shift; | 
| 422 | 1 |  |  |  |  | 5 | print Data::Dumper->Dump([$self->get_data()]); | 
| 423 |  |  |  |  |  |  | } | 
| 424 |  |  |  |  |  |  |  | 
| 425 |  |  |  |  |  |  | ## OVERRIDES FOR UNSUPPORTED FUNCTIONS | 
| 426 |  |  |  |  |  |  |  | 
| 427 |  |  |  |  |  |  | sub sort_data{ | 
| 428 | 0 |  |  | 0 |  |  | confess "ERROR: Statistics::Descriptive::Weighted does not support this function."; | 
| 429 |  |  |  |  |  |  | } | 
| 430 |  |  |  |  |  |  |  | 
| 431 |  |  |  |  |  |  | sub presorted{ | 
| 432 | 0 |  |  | 0 |  |  | confess "ERROR: Statistics::Descriptive::Weighted does not support this function."; | 
| 433 |  |  |  |  |  |  | } | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  | sub harmonic_mean{ | 
| 436 | 0 |  |  | 0 |  |  | confess "ERROR: Statistics::Descriptive::Weighted does not support this function."; | 
| 437 |  |  |  |  |  |  | } | 
| 438 |  |  |  |  |  |  |  | 
| 439 |  |  |  |  |  |  | sub geometric_mean{ | 
| 440 | 0 |  |  | 0 |  |  | confess "ERROR: Statistics::Descriptive::Weighted does not support this function."; | 
| 441 |  |  |  |  |  |  | } | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | sub trimmed_mean{ | 
| 444 | 0 |  |  | 0 |  |  | confess "ERROR: Statistics::Descriptive::Weighted does not support this function."; | 
| 445 |  |  |  |  |  |  | } | 
| 446 |  |  |  |  |  |  |  | 
| 447 |  |  |  |  |  |  | sub frequency_distribution{ | 
| 448 | 0 |  |  | 0 |  |  | confess "ERROR: Statistics::Descriptive::Weighted does not support this function."; | 
| 449 |  |  |  |  |  |  | } | 
| 450 |  |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | sub least_squares_fit{ | 
| 452 | 0 |  |  | 0 |  |  | confess "ERROR: Statistics::Descriptive::Weighted does not support this function."; | 
| 453 |  |  |  |  |  |  | } | 
| 454 |  |  |  |  |  |  |  | 
| 455 |  |  |  |  |  |  |  | 
| 456 |  |  |  |  |  |  | 1; | 
| 457 |  |  |  |  |  |  |  | 
| 458 |  |  |  |  |  |  | package Statistics::Descriptive; | 
| 459 |  |  |  |  |  |  |  | 
| 460 |  |  |  |  |  |  | ##All modules return true. | 
| 461 |  |  |  |  |  |  | 1; | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | __END__ |