File Coverage

blib/lib/Text/NSP/Measures/2D/CHI/x2.pm
Criterion Covered Total %
statement 24 25 96.0
branch 2 2 100.0
condition n/a
subroutine 6 7 85.7
pod n/a
total 32 34 94.1


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Text::NSP::Measures::2D::CHI::x2 - Perl module that implements Pearson's
4             chi squared measure of association for
5             bigrams.
6              
7             =head1 SYNOPSIS
8              
9             =head3 Basic Usage
10              
11             use Text::NSP::Measures::2D::CHI::x2;
12              
13             my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
14              
15             $x2_value = calculateStatistic( n11=>$n11,
16             n1p=>$n1p,
17             np1=>$np1,
18             npp=>$npp);
19              
20             if( ($errorCode = getErrorCode()))
21             {
22             print STDERR $errorCode." - ".getErrorMessage()."\n"";
23             }
24             else
25             {
26             print getStatisticName."value for bigram is ".$x2_value."\n"";
27             }
28              
29             =head1 DESCRIPTION
30              
31             Pearson's Chi-squred test measures the devitation between the observed
32             data and what would be expected if and were independent.
33             The higher the score, the less evidence there is in favor of concluding
34             that the words are independent.
35              
36              
37             Assume that the frequency count data associated with a bigram
38             is stored in a 2x2 contingency table:
39              
40             word2 ~word2
41             word1 n11 n12 | n1p
42             ~word1 n21 n22 | n2p
43             --------------
44             np1 np2 npp
45              
46             where n11 is the number of times occur together, and
47             n12 is the number of times occurs with some word other than
48             word2, and n1p is the number of times in total that word1 occurs as
49             the first word in a bigram.
50              
51             The expected values for the internal cells are calculated by taking the
52             product of their associated marginals and dividing by the sample size,
53             for example:
54              
55             np1 * n1p
56             m11= ---------
57             npp
58              
59             Then the deviation between observed and expected values for each internal
60             cell is computed to arrive at the Pearson's Chi-Squared test value:
61              
62             Pearson's Chi-Squared = 2 * [((n11 - m11)/m11)^2 + ((n12 - m12)/m12)^2 +
63             ((n21 - m21)/m21)^2 + ((n22 -m22)/m22)^2]
64              
65              
66             =over
67              
68             =cut
69              
70              
71             package Text::NSP::Measures::2D::CHI::x2;
72              
73              
74 2     2   3835 use Text::NSP::Measures::2D::CHI;
  2         6  
  2         1359  
75 2     2   13 use strict;
  2         4  
  2         171  
76 2     2   12 use Carp;
  2         3  
  2         193  
77 2     2   13 use warnings;
  2         4  
  2         73  
78 2     2   11 no warnings 'redefine';
  2         4  
  2         1278  
79             require Exporter;
80              
81             our ($VERSION, @EXPORT, @ISA);
82              
83             @ISA = qw(Exporter);
84              
85             @EXPORT = qw(initializeStatistic calculateStatistic
86             getErrorCode getErrorMessage getStatisticName);
87              
88             $VERSION = '0.97';
89              
90              
91             =item calculateStatistic() - method to calculate the Chi-squared value.
92              
93             INPUT PARAMS : $count_values .. Reference of an hash containing
94             the count values computed by the
95             count.pl program.
96              
97             RETURN VALUES : $x2 .. x2 value for this bigram.
98              
99             =cut
100              
101             sub calculateStatistic
102             {
103 15     15   3496 my %values = @_;
104              
105             # computes and returns the observed and expected values from
106             # the frequency combination values. returns 0 if there is an
107             # error in the computation or the values are inconsistent.
108 15 100       62 if( !(Text::NSP::Measures::2D::CHI::getValues(\%values)) ) {
109 10         38 return;
110             }
111             # Now calculate the xsquare
112 5         8 my $Xsquare = 0;
113              
114 5         17 $Xsquare += Text::NSP::Measures::2D::CHI::computeVal($n11, $m11);
115 5         16 $Xsquare += Text::NSP::Measures::2D::CHI::computeVal($n12, $m12);
116 5         14 $Xsquare += Text::NSP::Measures::2D::CHI::computeVal($n21, $m21);
117 5         13 $Xsquare += Text::NSP::Measures::2D::CHI::computeVal($n22, $m22);
118              
119 5         17 return $Xsquare;
120             }
121              
122              
123              
124             =item getStatisticName() - Returns the name of this statistic
125              
126             INPUT PARAMS : none
127              
128             RETURN VALUES : $name .. Name of the measure.
129              
130             =cut
131              
132             sub getStatisticName
133             {
134 0     0     return "Chi-squared test";
135             }
136              
137              
138              
139             1;
140             __END__