File Coverage

blib/lib/Text/NSP/Measures/2D/Fisher/twotailed.pm
Criterion Covered Total %
statement 33 35 94.2
branch 8 10 80.0
condition n/a
subroutine 6 7 85.7
pod n/a
total 47 52 90.3


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Text::NSP::Measures::2D::Fisher::twotailed - Perl module implementation of the two-sided
4             Fisher's exact test.
5              
6             =head1 SYNOPSIS
7              
8             =head3 Basic Usage
9              
10             use Text::NSP::Measures::2D::Fisher::twotailed;
11              
12             my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
13              
14             $twotailed_value = calculateStatistic( n11=>$n11,
15             n1p=>$n1p,
16             np1=>$np1,
17             npp=>$npp);
18              
19             if( ($errorCode = getErrorCode()))
20             {
21             print STDERR $errorCode." - ".getErrorMessage();
22             }
23             else
24             {
25             print getStatisticName."value for bigram is ".$twotailed_value;
26             }
27              
28             =head1 DESCRIPTION
29              
30             Assume that the frequency count data associated with a bigram
31             is stored in a 2x2 contingency table:
32              
33             word2 ~word2
34             word1 n11 n12 | n1p
35             ~word1 n21 n22 | n2p
36             --------------
37             np1 np2 npp
38              
39             where n11 is the number of times occur together, and
40             n12 is the number of times occurs with some word other than
41             word2, and n1p is the number of times in total that word1 occurs as
42             the first word in a bigram.
43              
44             The fishers exact tests are calculated by fixing the marginal totals
45             and computing the hypergeometric probabilities for all the possible
46             contingency tables,
47              
48             A twotailed fishers test is calculated by adding the probabilities of
49             all the contingency tables with probabilities less than the probability
50             of the observed table. The twotailed fishers test tells us how likely
51             it would be to observe an contingency table which is less probable than
52             the current table.
53              
54             =head2 Methods
55              
56             =over
57              
58             =cut
59              
60             package Text::NSP::Measures::2D::Fisher::twotailed;
61              
62              
63 1     1   2110 use Text::NSP::Measures::2D::Fisher;
  1         3  
  1         234  
64 1     1   6 use strict;
  1         1  
  1         21  
65 1     1   4 use Carp;
  1         3  
  1         56  
66 1     1   6 use warnings;
  1         2  
  1         30  
67 1     1   4 no warnings 'redefine';
  1         3  
  1         405  
68             require Exporter;
69              
70             our ($VERSION, @EXPORT, @ISA);
71              
72             @ISA = qw(Exporter);
73              
74             @EXPORT = qw(initializeStatistic calculateStatistic
75             getErrorCode getErrorMessage getStatisticName);
76              
77             $VERSION = '0.97';
78              
79              
80             =item calculateStatistic() - This method calculates the twotailed
81             Fisher value
82              
83             INPUT PARAMS : $count_values .. Reference of an array containing
84             the count values computed by the
85             count.pl program.
86              
87             RETURN VALUES : $twotailed .. Twotailed Fisher value.
88              
89             =cut
90              
91             sub calculateStatistic
92             {
93 12     12   1847 my %values = @_;
94              
95 12         14 my $probabilities;
96              
97             # computes and returns the observed and marginal values from
98             # the frequency combination values. returns 0 if there is an
99             # error in the computation or the values are inconsistent.
100 12 100       34 if( !(Text::NSP::Measures::2D::Fisher::getValues(\%values)) )
101             {
102 10         21 return;
103             }
104              
105 2 50       5 my $final_limit = ($n1p < $np1) ? $n1p : $np1;
106              
107 2         3 my $n11_org = $n11;
108 2         5 my $n11_start = $n1p + $np1 - $npp;
109 2 100       5 if($n11_start<0)
110             {
111 1         2 $n11_start = 0;
112             }
113              
114 2 50       6 if( !($probabilities = Text::NSP::Measures::2D::Fisher::computeDistribution($n11_start, $final_limit)))
115             {
116 0         0 return;
117             }
118              
119 2         3 my $value;
120              
121 2         2 my $ttfisher=0;
122              
123 2         9 foreach $value (sort { $a <=> $b } values %$probabilities)
  91         92  
124             {
125 32 100       56 if($value > $probabilities->{$n11_org})
126             {
127 12         15 next;
128             }
129 20         43 $ttfisher += exp($value);
130             }
131              
132 2         12 return $ttfisher;
133             }
134              
135              
136             =item getStatisticName() - Returns the name of this statistic
137              
138             INPUT PARAMS : none
139              
140             RETURN VALUES : $name .. Name of the measure.
141              
142             =cut
143              
144             sub getStatisticName
145             {
146 0     0     return "Two Tailed Fisher";
147             }
148              
149              
150              
151             1;
152             __END__