File Coverage

blib/lib/Statistics/Simpson.pm
Criterion Covered Total %
statement 39 42 92.8
branch 19 24 79.1
condition 2 3 66.6
subroutine 5 6 83.3
pod 2 2 100.0
total 67 77 87.0


line stmt bran cond sub pod time code
1             package Statistics::Simpson;
2              
3 3     3   1473 use strict;
  3         5  
  3         109  
4              
5 3     3   12 use vars qw($VERSION @ISA);
  3         3  
  3         256  
6              
7             $VERSION = '0.02';
8              
9 3     3   1638 use Statistics::Frequency 0.04;
  3         4250  
  3         1171  
10             @ISA = qw(Statistics::Frequency);
11              
12             my $Napier = exp(1);
13              
14             =head1 NAME
15              
16             Statistics::Simpson - Simpson index
17              
18             =head1 SYNOPSIS
19              
20             The object-oriented interface:
21              
22             use Statistics::Simpson;
23              
24             # The constructor is inherited from Statistics::Frequency.
25              
26             my $pop = Statistics::Simpson->new(@data);
27             my $pop = Statistics::Simpson->new(\@data);
28             my $pop = Statistics::Simpson->new(\%data);
29             my $pop = Statistics::Simpson->new($another);
30              
31             # The Simpson index and the Simpson evenness.
32              
33             print $pop->index, "\n";
34              
35             print $pop->evenness, "\n";
36              
37             The "anonymous" interface where the population data is not a
38             Statistics::Frequency object but instead either an array reference,
39             in which case the array elements are the frequencies, or a hash
40             reference, in which keys the hash values are the frequencies.
41              
42             use Statistics::Simpson;
43              
44             print Statistics::Simpson::index([ data ]), "\n";
45              
46             print Statistics::Simpson::index({ data }), "\n";
47              
48             print Statistics::Simpson::evenness([ data ]), "\n";
49              
50             print Statistics::Simpson::evenness({ data }), "\n";
51              
52             The rest of data manipulation interface inherited from Statistics::Frequency:
53              
54             $pop->add_data(@more_data);
55             $pop->add_data(\@more_data);
56             $pop->add_data(\%more_data);
57             $pop->add_data($another);
58              
59             $pop->remove_data(@less_data);
60             $pop->remove_data(\@less_data);
61             $pop->remove_data(\%less_data);
62             $pop->remove_data($another);
63              
64             $pop->copy_data($another);
65              
66             $pop->clear_data();
67              
68             =head1 DESCRIPTION
69              
70             Statistics::Simpson module can be used to compute the Simpson
71             index of data, which measures the variability of data.
72              
73             The index() and evenness() interfaces are the only genuine interfaces
74             of this module, the constructor and the rest of the data manipulation
75             interface is inherited from Statistics::Frequency.
76              
77             =head2 new
78              
79             my $pop = Statistics::Simpson->new(@data);
80             my $pop = Statistics::Simpson->new(\@data);
81             my $pop = Statistics::Simpson->new(\%data);
82             my $pop = Statistics::Simpson->new($another);
83              
84             Creates a new Simpson object from the initial data.
85              
86             The data may be either a list, a reference to an array or a reference
87             to a hash.
88              
89             =over 4
90              
91             =item *
92              
93             If the data is a list (or an array), the list elements are counted
94             to find out their frequencies.
95              
96             =item *
97              
98             If the data is a reference to an array, the array elements are counted
99             to find out their frequencies.
100              
101             =item *
102              
103             If the data is a reference to a hash, the hash keys are the data
104             elements and the hash values are the data frequencies.
105              
106             =item *
107              
108             If the data is another Statistics::Simpson object, its
109             frequencies are used.
110              
111             =back
112              
113             =head2 index
114              
115             $pop->index;
116              
117             Return the Simpson index of the data. The index is defined as
118              
119             $Simpson = 1 / sum($p{$e}**2)
120              
121             where the $p{$e} is the proportional [0,1] frequency of the element $e.
122             The value of the index ranges from 1 (the population is dominated by
123             one kind) to the number of different elements (the population is
124             evenly divided).
125              
126             The Simpson index is used in biology and ecology, especially when
127             talking about populations and biodiversity.
128              
129             =head2 evenness
130              
131             Evenness measures how similar the frequencies are.
132              
133             $Evenness = $Simpson / $NumberOfDifferentElements
134              
135             When all the frequencies are equal, evenness is one. Frequency
136             imbalance lowers the evenness value.
137              
138             =head2 add_data
139              
140             $pop->add_data(@more_data);
141             $pop->add_data(\@more_data);
142             $pop->add_data(\%more_data);
143             $pop->add_data($another);
144            
145             Add more data to the object. The arguments are as in new().
146              
147             =head2 remove_data
148              
149             $pop->remove_data(@less_data);
150             $pop->remove_data(\@less_data);
151             $pop->remove_data(\%less_data);
152             $pop->remove_data($another);
153            
154             Remove data from the object. The arguments are as in new().
155             The frequencies of data elements are gapped at zero.
156              
157             =head2 copy_data
158              
159             $pop->clear_data($another);
160            
161             Copy all data from another object. The old data is discarded.
162              
163             =head2 clear_data
164              
165             $pop->clear_data();
166            
167             Remove all data from the object.
168              
169             =head1 SEE ALSO
170              
171             For another variability index see
172              
173             L
174              
175             For the data manipulation interface see (though the whole
176             interface is documented here)
177              
178             L
179              
180             =head1 COPYRIGHT AND LICENSE
181              
182             Copyright (C) 2002-2015, Jarkko Hietaniemi
183              
184             This library is free software; you can redistribute it and/or modify
185             it under the same terms as Perl 5.18.2.
186              
187             =cut
188              
189             sub index {
190 7     7 1 187 my ($self) = @_;
191 7         8 my $simpson = 0;
192 7 50       20 if (ref $self eq 'HASH') {
193 0         0 $self = [ values %$self ];
194             }
195 7 100       18 if (ref $self eq 'ARRAY') {
196 2         3 my $total;
197 2         6 for my $e (@$self) {
198 6         8 $total += $e;
199             }
200 2         4 for my $e (@$self) {
201 6         9 my $prop = $e / $total;
202 6 50       13 next unless $prop;
203 6         10 $simpson += $prop * $prop;
204             }
205 2 50       10 $simpson = 1 / $simpson if $simpson;
206             } else {
207 5 100 66     20 if (!exists $self->{simpson} || !defined $self->{simpson}) {
208 3         27 my %prop = $self->proportional_frequencies;
209 3         106 for my $e (keys %prop) {
210 4 50       9 next unless $prop{$e};
211 4         7 $simpson += $prop{$e} * $prop{$e};
212             }
213 3 100       10 if ($simpson) {
214 2         2 $simpson = 1 / $simpson;
215 2         3 $self->{simpson} = $simpson;
216 2     0   14 $self->_set_update_callback( sub { delete $_[0]->{simpson} } );
  0         0  
217             }
218             }
219 5         13 $simpson = $self->{simpson};
220             }
221 7         19 return $simpson;
222             }
223              
224             sub evenness {
225 4     4 1 20 my ($self) = @_;
226 4 50       14 if (ref $self eq 'HASH') {
227 0         0 $self = [ values %$self ];
228             }
229 4         10 my $a = ref $self eq 'ARRAY';
230 4 100       17 my $S = $a ? @$self : $self->elements;
231 4 100       29 my $i = $S ? ( $a ? Statistics::Simpson::index($self) : $self->index ) : undef;
    100          
232 4 100       12 my $E = $S ? $i / $S : undef;
233 4         12 return $E;
234             }
235              
236             1;