File Coverage

blib/lib/Text/NSP/Measures/2D/odds.pm
Criterion Covered Total %
statement 24 27 88.8
branch 6 8 75.0
condition n/a
subroutine 6 7 85.7
pod n/a
total 36 42 85.7


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Text::NSP::Measures::2D::odds - Perl module to compute the Odds
4             ratio for bigrams.
5              
6             =head1 SYNOPSIS
7              
8             =head3 Basic Usage
9              
10             use Text::NSP::Measures::2D::odds;
11              
12             my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
13              
14             $odds_value = calculateStatistic( n11=>$n11,
15             n1p=>$n1p,
16             np1=>$np1,
17             npp=>$npp);
18              
19             if( ($errorCode = getErrorCode()))
20             {
21             print STDERR $errorCode." - ".getErrorMessage()."\n"";
22             }
23             else
24             {
25             print getStatisticName."value for bigram is ".$odds_value."\n"";
26             }
27              
28              
29              
30             =head1 DESCRIPTION
31              
32             Assume that the frequency count data associated with a bigram
33             is stored in a 2x2 contingency table:
34              
35             word2 ~word2
36             word1 n11 n12 | n1p
37             ~word1 n21 n22 | n2p
38             --------------
39             np1 np2 npp
40              
41             where n11 is the number of times occur together, and
42             n12 is the number of times occurs with some word other than
43             word2, and n1p is the number of times in total that word1 occurs as
44             the first word in a bigram.
45              
46             The odds ratio computes the ratio of the number of times that
47             the words in a bigram occur together (or not at all) to the
48             number of times the words occur individually. It is the cross
49             product of the diagonal and the off-diagonal.
50              
51             Thus, ODDS RATIO = n11*n22/n21*n12
52              
53             if n21 and/or n12 is 0, then each zero value is "smoothed" to one to
54             avoid a zero in the denominator.
55              
56             =over
57              
58             =cut
59              
60              
61             package Text::NSP::Measures::2D::odds;
62              
63              
64 1     1   1374 use Text::NSP::Measures::2D;
  1         3  
  1         183  
65 1     1   4 use strict;
  1         2  
  1         17  
66 1     1   4 use Carp;
  1         2  
  1         49  
67 1     1   4 use warnings;
  1         2  
  1         22  
68 1     1   9 no warnings 'redefine';
  1         1  
  1         216  
69             require Exporter;
70              
71             our ($VERSION, @EXPORT, @ISA);
72              
73             @ISA = qw(Exporter);
74              
75             @EXPORT = qw(initializeStatistic calculateStatistic
76             getErrorCode getErrorMessage getStatisticName);
77              
78             $VERSION = '0.97';
79              
80              
81             =item calculateStatistic() - method to calculate the odds ratio value!
82              
83             INPUT PARAMS : $count_values .. Reference of an hash containing
84             the count values computed by the
85             count.pl program.
86              
87             RETURN VALUES : $odds .. Odds ratio for this bigram.
88              
89             =cut
90              
91             sub calculateStatistic
92             {
93 28     28   379 my %values = @_;
94              
95             # computes and returns the marginal totals from the frequency
96             # combination values. returns undef if there is an error in
97             # the computation or the values are inconsistent.
98 28 100       70 if(!(Text::NSP::Measures::2D::computeMarginalTotals(\%values)) ){
99 5         12 return;
100             }
101              
102             # computes and returns the observed from the frequency
103             # combination values. returns 0 if there is an error in
104             # the computation or the values are inconsistent.
105 23 100       55 if( !(Text::NSP::Measures::2D::computeObservedValues(\%values)) ) {
106 5         11 return(0);
107             }
108              
109             # Add-one smoothing to avoid zero denominator
110              
111 18 50       35 if ($n21 == 0)
112             {
113 0         0 $n21 = 1;
114             }
115 18 50       32 if ($n12 == 0)
116             {
117 0         0 $n12 = 1;
118             }
119              
120 18         32 my $odds = (($n11*$n22) / ($n12*$n21));
121              
122 18         51 return ($odds);
123             }
124              
125              
126              
127             =item getStatisticName() - Returns the name of this statistic
128              
129             INPUT PARAMS : none
130              
131             RETURN VALUES : $name .. Name of the measure.
132              
133             =cut
134              
135             sub getStatisticName
136             {
137 0     0     return "Odds Ratio";
138             }
139              
140              
141              
142             1;
143             __END__