File Coverage

blib/lib/Text/NSP/Measures/2D/CHI/phi.pm
Criterion Covered Total %
statement 24 25 96.0
branch 2 2 100.0
condition n/a
subroutine 6 7 85.7
pod n/a
total 32 34 94.1


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Text::NSP::Measures::2D::CHI::phi - Perl module that implements Phi coefficient
4             measure for bigrams.
5              
6             =head1 SYNOPSIS
7              
8             =head3 Basic Usage
9              
10             use Text::NSP::Measures::2D::CHI::phi;
11              
12             my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
13              
14             $phi_value = calculateStatistic( n11=>$n11,
15             n1p=>$n1p,
16             np1=>$np1,
17             npp=>$npp);
18              
19             if( ($errorCode = getErrorCode()))
20             {
21             print STDERR $errorCode." - ".getErrorMessage()."\n"";
22             }
23             else
24             {
25             print getStatisticName."value for bigram is ".$phi_value."\n"";
26             }
27              
28             =head1 DESCRIPTION
29              
30             This function computes the the square of the traditional formulation of
31             the Phi Coefficient.
32              
33             Assume that the frequency count data associated with a bigram
34             is stored in a 2x2 contingency table:
35              
36             word2 ~word2
37             word1 n11 n12 | n1p
38             ~word1 n21 n22 | n2p
39             --------------
40             np1 np2 npp
41              
42             where n11 is the number of times occur together, and
43             n12 is the number of times occurs with some word other than
44             word2, and n1p is the number of times in total that word1 occurs as
45             the first word in a bigram.
46              
47             PHI^2 = ((n11 * n22) - (n21 * n21))^2/(n1p * np1 * np2 * n2p)
48              
49             Note that the value of PHI^2 is equivalent to
50             Pearson's Chi-Squared test multiplied by the sample size, that is:
51              
52             Chi-Squared = npp * PHI^2
53              
54             We use PHI^2 rather than PHI since PHI^2 was employed for collocation
55             identification in:
56              
57             Church, K. (1991) Concordances for Parallel Text, Seventh Annual
58             Conference of the UW Centre for the New OED and Text Research, Oxford,
59             England.
60              
61             =over
62              
63             =cut
64              
65              
66             package Text::NSP::Measures::2D::CHI::phi;
67              
68              
69 1     1   1527 use Text::NSP::Measures::2D::CHI;
  1         3  
  1         234  
70 1     1   6 use strict;
  1         2  
  1         20  
71 1     1   6 use Carp;
  1         2  
  1         54  
72 1     1   4 use warnings;
  1         3  
  1         26  
73 1     1   5 no warnings 'redefine';
  1         1  
  1         312  
74             require Exporter;
75              
76             our ($VERSION, @EXPORT, @ISA);
77              
78             @ISA = qw(Exporter);
79              
80             @EXPORT = qw(initializeStatistic calculateStatistic
81             getErrorCode getErrorMessage getStatisticName);
82              
83             $VERSION = '0.97';
84              
85              
86             =item calculateStatistic() - method to calculate the Phi Coefficient
87              
88             INPUT PARAMS : $count_values .. Reference of an hash containing
89             the count values computed by the
90             count.pl program.
91              
92             RETURN VALUES : $phi .. phi value for this bigram.
93              
94             =cut
95              
96             sub calculateStatistic
97             {
98 12     12   951 my %values = @_;
99              
100             # computes and returns the observed and expected values from
101             # the frequency combination values. returns 0 if there is an
102             # error in the computation or the values are inconsistent.
103 12 100       34 if( !(Text::NSP::Measures::2D::CHI::getValues(\%values)) ) {
104 10         26 return;
105             }
106              
107             # Now calculate the phi coefficient
108 2         4 my $phi = 0;
109              
110 2         7 $phi += Text::NSP::Measures::2D::CHI::computeVal($n11, $m11);
111 2         6 $phi += Text::NSP::Measures::2D::CHI::computeVal($n12, $m12);
112 2         9 $phi += Text::NSP::Measures::2D::CHI::computeVal($n21, $m21);
113 2         5 $phi += Text::NSP::Measures::2D::CHI::computeVal($n22, $m22);
114              
115 2         8 return $phi/$values{npp};
116             }
117              
118              
119              
120             =item getStatisticName() - Returns the name of this statistic
121              
122             INPUT PARAMS : none
123              
124             RETURN VALUES : $name .. Name of the measure.
125              
126             =cut
127              
128             sub getStatisticName
129             {
130 0     0     return "Phi Coefficient";
131             }
132              
133              
134              
135             1;
136             __END__