File Coverage

blib/lib/Text/NSP/Measures/2D/MI/ps.pm
Criterion Covered Total %
statement 21 22 95.4
branch 2 2 100.0
condition n/a
subroutine 6 7 85.7
pod n/a
total 29 31 93.5


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Text::NSP::Measures::2D::MI::ps - Perl module that implements Poisson-Stirling
4             measure of association for bigrams.
5              
6             =head1 SYNOPSIS
7              
8             =head3 Basic Usage
9              
10             use Text::NSP::Measures::2D::MI::ps;
11              
12             my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
13              
14             $ps_value = calculateStatistic( n11=>$n11,
15             n1p=>$n1p,
16             np1=>$np1,
17             npp=>$npp);
18              
19             if( ($errorCode = getErrorCode()))
20             {
21             print STDERR $errorCode." - ".getErrorMessage()."\n"";
22             }
23             else
24             {
25             print getStatisticName."value for bigram is ".$ps_value."\n"";
26             }
27              
28             =head1 DESCRIPTION
29              
30             The log-likelihood ratio measures the deviation between the observed data
31             and what would be expected if and were independent. The
32             higher the score, the less evidence there is in favor of concluding that
33             the words are independent.
34              
35             Assume that the frequency count data associated with a bigram
36             as shown by a 2x2 contingency table:
37              
38             word2 ~word2
39             word1 n11 n12 | n1p
40             ~word1 n21 n22 | n2p
41             --------------
42             np1 np2 npp
43              
44             where n11 is the number of times occur together, and
45             n12 is the number of times occurs with some word other than
46             word2, and n1p is the number of times in total that word1 occurs as
47             the first word in a bigram.
48              
49             The expected values for the internal cells are calculated by taking the
50             product of their associated marginals and dividing by the sample size,
51             for example:
52              
53             np1 * n1p
54             m11= ---------
55             npp
56              
57             The Poisson Stirling measure is a negative logarithmic approximation
58             of the Poisson-likelihood measure. It uses the Stirling's formula to
59             approximate the factorial in Poisson-likelihood measure.
60              
61             Poisson-Stirling = n11 * ( log(n11) - log(m11) - 1)
62              
63             which is same as
64              
65             Poisson-Stirling = n11 * ( log(n11/m11) - 1)
66              
67              
68             =head2 Methods
69              
70             =over
71              
72             =cut
73              
74              
75             package Text::NSP::Measures::2D::MI::ps;
76              
77              
78 1     1   3264 use Text::NSP::Measures::2D::MI;
  1         3  
  1         414  
79 1     1   85 use strict;
  1         2  
  1         35  
80 1     1   5 use Carp;
  1         3  
  1         62  
81 1     1   6 use warnings;
  1         2  
  1         66  
82 1     1   6 no warnings 'redefine';
  1         2  
  1         289  
83             require Exporter;
84              
85             our ($VERSION, @EXPORT, @ISA);
86              
87             @ISA = qw(Exporter);
88              
89             @EXPORT = qw(initializeStatistic calculateStatistic
90             getErrorCode getErrorMessage getStatisticName);
91              
92             $VERSION = '0.97';
93              
94             =item calculateStatistic() - This method calculates the ps value
95              
96             INPUT PARAMS : $count_values .. Reference of an hash containing
97             the count values computed by the
98             count.pl program.
99              
100             RETURN VALUES : $poissonStirling .. Poisson-Stirling value for this bigram.
101              
102             =cut
103              
104             sub calculateStatistic
105             {
106 11     11   1917 my %values = @_;
107              
108             # computes and returns the observed and expected values from
109             # the frequency combination values. returns 0 if there is an
110             # error in the computation or the values are inconsistent.
111 11 100       33 if( !(Text::NSP::Measures::2D::MI::getValues(\%values)) ) {
112 10         22 return;
113             }
114              
115             # Now for the actual calculation of Loglikelihood!
116 1         2 my $poissonStirling = 0;
117              
118             # dont want ($nxy / $mxy) to be 0 or less! flag error if so!
119 1         4 $poissonStirling = $n11 * (Text::NSP::Measures::2D::MI::computePMI($n11,$m11) - 1);
120              
121 1         4 return $poissonStirling;
122             }
123              
124              
125             =item getStatisticName() - Returns the name of this statistic
126              
127             INPUT PARAMS : none
128              
129             RETURN VALUES : $name .. Name of the measure.
130              
131             =cut
132              
133             sub getStatisticName
134             {
135 0     0     return "Poisson-Stirling Measure";
136             }
137              
138              
139              
140             1;
141             __END__