File Coverage

blib/lib/Text/NSP/Measures/3D/MI/ll.pm
Criterion Covered Total %
statement 28 29 96.5
branch 2 2 100.0
condition n/a
subroutine 6 7 85.7
pod n/a
total 36 38 94.7


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Text::NSP::Measures::3D::MI::ll - Perl module that implements Loglikelihood
4             measure of association for trigrams.
5              
6             =head1 SYNOPSIS
7              
8             =head3 Basic Usage
9              
10             use Text::NSP::Measures::3D::MI::ll;
11              
12             $ll_value = calculateStatistic( n111=>10,
13             n1pp=>40,
14             np1p=>45,
15             npp1=>42,
16             n11p=>20,
17             n1p1=>23,
18             np11=>21,
19             nppp=>100);
20              
21             if( ($errorCode = getErrorCode()))
22             {
23             print STDERR $erroCode." - ".getErrorMessage()."\n";
24             }
25             else
26             {
27             print getStatisticName."value for trigram is ".$ll_value."\n";
28             }
29              
30              
31             =head1 DESCRIPTION
32              
33             The log-likelihood ratio measures the devitation between the observed data
34             and what would be expected if , and were independent.
35             The higher the score, the less evidence there is in favor of concluding that
36             the words are independent.
37              
38             The expected values for the internal cells are calculated by taking the
39             product of their associated marginals and dividing by the sample size,
40             for example:
41              
42             n1pp * np1p * npp1
43             m111= --------------------
44             nppp
45              
46             Then the deviation between observed and expected values for each internal
47             cell is computed to arrive at the log-likelihood value.
48              
49             Log-Likelihood = 2 * [n111 * log(n111/m111) + n112 * log(n112/m112) +
50             n121 * log(n121/m121) + n122 * log(n122/m122) +
51             n211 * log(n211/m211) + n212 * log(n212/m212) +
52             n221 * log(n221/m221) + n222 * log(n222/m222)]
53              
54             =over
55              
56             =cut
57              
58              
59             package Text::NSP::Measures::3D::MI::ll;
60              
61              
62 2     2   13791 use Text::NSP::Measures::3D::MI;
  2         7  
  2         761  
63 2     2   13 use strict;
  2         4  
  2         113  
64 2     2   15 use Carp;
  2         3  
  2         129  
65 2     2   11 use warnings;
  2         4  
  2         72  
66 2     2   11 no warnings 'redefine';
  2         4  
  2         1856  
67             require Exporter;
68              
69             our ($VERSION, @EXPORT, @ISA);
70              
71             @ISA = qw(Exporter);
72              
73             @EXPORT = qw(initializeStatistic calculateStatistic
74             getErrorCode getErrorMessage getStatisticName);
75              
76             $VERSION = '0.97';
77              
78             =item calculateStatistic($count_values) - This method calculates
79             the ll value
80              
81             INPUT PARAMS : $count_values .. Reference of an hash containing
82             the count values computed by the
83             count.pl program.
84              
85             RETURN VALUES : $loglikelihood .. Loglikelihood value for this trigram.
86              
87             =cut
88              
89             sub calculateStatistic
90             {
91 19     19   4928 my %values = @_;
92              
93             # computes and sets the observed and expected values from
94             # the frequency combination values. returns 0 if there is an
95             # error in the computation or the values are inconsistent.
96 19 100       71 if( !(Text::NSP::Measures::3D::MI::getValues(\%values)) ) {
97 15         34 return;
98             }
99              
100             # Now for the actual calculation of Loglikelihood!
101 4         8 my $logLikelihood = 0;
102              
103             # dont want ($nxy / $mxy) to be 0 or less! flag error if so!
104 4         16 $logLikelihood += $n111 * Text::NSP::Measures::3D::MI::computePMI( $n111, $m111 );
105 4         13 $logLikelihood += $n112 * Text::NSP::Measures::3D::MI::computePMI( $n112, $m112 );
106 4         12 $logLikelihood += $n121 * Text::NSP::Measures::3D::MI::computePMI( $n121, $m121 );
107 4         11 $logLikelihood += $n122 * Text::NSP::Measures::3D::MI::computePMI( $n122, $m122 );
108 4         12 $logLikelihood += $n211 * Text::NSP::Measures::3D::MI::computePMI( $n211, $m211 );
109 4         12 $logLikelihood += $n212 * Text::NSP::Measures::3D::MI::computePMI( $n212, $m212 );
110 4         17 $logLikelihood += $n221 * Text::NSP::Measures::3D::MI::computePMI( $n221, $m221 );
111 4         12 $logLikelihood += $n222 * Text::NSP::Measures::3D::MI::computePMI( $n222, $m222 );
112              
113 4         19 return ( 2 * $logLikelihood );
114             }
115              
116              
117             =item getStatisticName() - Returns the name of this statistic
118              
119             INPUT PARAMS : none
120              
121             RETURN VALUES : $name .. Name of the measure.
122              
123             =cut
124              
125             sub getStatisticName
126             {
127 0     0     return "Loglikelihood";
128             }
129              
130              
131              
132             1;
133             __END__