File Coverage

blib/lib/Text/NSP/Measures/2D/Fisher/left.pm
Criterion Covered Total %
statement 31 34 91.1
branch 6 8 75.0
condition n/a
subroutine 6 7 85.7
pod n/a
total 43 49 87.7


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Text::NSP::Measures::2D::Fisher::left - Perl module implementation of the left sided
4             Fisher's exact test.
5              
6             =head1 SYNOPSIS
7              
8             =head3 Basic Usage
9              
10             use Text::NSP::Measures::2D::Fisher::left;
11              
12             my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
13              
14             $left_value = calculateStatistic( n11=>$n11,
15             n1p=>$n1p,
16             np1=>$np1,
17             npp=>$npp);
18              
19             if( ($errorCode = getErrorCode()))
20             {
21             print STDERR $errorCode." - ".getErrorMessage();
22             }
23             else
24             {
25             print getStatisticName."value for bigram is ".$left_value;
26             }
27              
28              
29             =head1 DESCRIPTION
30              
31             Assume that the frequency count data associated with a bigram
32             is stored in a 2x2 contingency table:
33              
34             word2 ~word2
35             word1 n11 n12 | n1p
36             ~word1 n21 n22 | n2p
37             --------------
38             np1 np2 npp
39              
40             where n11 is the number of times occur together, and
41             n12 is the number of times occurs with some word other than
42             word2, and n1p is the number of times in total that word1 occurs as
43             the first word in a bigram.
44              
45             The fishers exact tests are calculated by fixing the marginal totals
46             and computing the hypergeometric probabilities for all the possible
47             contingency tables,
48              
49             A left sided test is calculated by adding the probabilities of all
50             the possible two by two contingency tables formed by fixing the
51             marginal totals and changing the value of n11 to less than the given
52             value. A left sided Fisher's Exact Test tells us how likely it is to
53             randomly sample a table where n11 is less than observed. In other words,
54             it tells us how likely it is to sample an observation where the two words
55             are less dependent than currently observed.
56              
57             =head2 Methods
58              
59             =over
60              
61             =cut
62              
63              
64             package Text::NSP::Measures::2D::Fisher::left;
65              
66              
67 1     1   1484 use Text::NSP::Measures::2D::Fisher;
  1         3  
  1         217  
68 1     1   5 use strict;
  1         2  
  1         20  
69 1     1   4 use Carp;
  1         2  
  1         57  
70 1     1   5 use warnings;
  1         2  
  1         114  
71 1     1   6 no warnings 'redefine';
  1         2  
  1         348  
72             require Exporter;
73              
74             our ($VERSION, @EXPORT, @ISA);
75              
76             @ISA = qw(Exporter);
77              
78             @EXPORT = qw(initializeStatistic calculateStatistic
79             getErrorCode getErrorMessage getStatisticName);
80              
81             $VERSION = '0.97';
82              
83              
84             =item calculateStatistic() - This method computes the left sided Fishers
85             exact test.
86              
87             INPUT PARAMS : $count_values .. Reference of an array containing
88             the count values computed by the
89             count.pl program.
90              
91             RETURN VALUES : $left .. Left Fisher value.
92              
93             =cut
94              
95             sub calculateStatistic
96             {
97 12     12   240 my %values = @_;
98 12         19 my $probabilities;
99              
100             # computes and returns the observed and marginal values from
101             # the frequency combination values. returns 0 if there is an
102             # error in the computation or the values are inconsistent.
103 12 100       38 if( !(Text::NSP::Measures::2D::Fisher::getValues(\%values)) )
104             {
105 10         29 return;
106             }
107              
108 2         5 my $final_limit = $n11;
109 2         6 my $n11_start = $n1p + $np1 - $npp;
110 2 100       6 if($n11_start<0)
111             {
112 1         3 $n11_start = 0;
113             }
114              
115 2 50       8 if( !($probabilities = Text::NSP::Measures::2D::Fisher::computeDistribution($n11_start, $final_limit)))
116             {
117 0         0 return;
118             }
119              
120              
121 2         3 my $key_n11;
122              
123 2         5 my $leftfisher=0;
124              
125 2         11 foreach $key_n11 (sort { $a <=> $b } keys %$probabilities)
  27         40  
126             {
127 12 50       43 if($key_n11>$final_limit)
128             {
129 0         0 last;
130             }
131 12         40 $leftfisher += exp($probabilities->{$key_n11});
132             }
133              
134 2         14 return $leftfisher;
135             }
136              
137              
138             =item getStatisticName()
139              
140             Returns the name of this statistic
141              
142             INPUT PARAMS : none
143              
144             RETURN VALUES : $name .. Name of the measure.
145              
146             =cut
147              
148             sub getStatisticName
149             {
150 0     0     return "Left Fisher";
151             }
152              
153              
154              
155             1;
156             __END__