File Coverage

blib/lib/Text/NSP/Measures/2D/Fisher/right.pm

Criterion	Covered	Total	%
statement	44	51	86.2
branch	14	24	58.3
condition			n/a
subroutine	6	7	85.7
pod			n/a
total	64	82	78.0

line	stmt	bran	sub	time	code
1					=head1 NAME
2
3					Text::NSP::Measures::2D::Fisher::right - Perl module implementation of the right sided
4					Fisher's exact test.
5
6					=head1 SYNOPSIS
7
8					=head3 Basic Usage
9
10					use Text::NSP::Measures::2D::Fisher::right;
11
12					my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
13
14					$right_value = calculateStatistic( n11=>$n11,
15					n1p=>$n1p,
16					np1=>$np1,
17					npp=>$npp);
18
19					if( ($errorCode = getErrorCode()))
20					{
21					print STDERR $errorCode." - ".getErrorMessage();
22					}
23					else
24					{
25					print getStatisticName."value for bigram is ".$right_value;
26					}
27
28
29					=head1 DESCRIPTION
30
31					Assume that the frequency count data associated with a bigram
32					is stored in a 2x2 contingency table:
33
34					word2 ~word2
35					word1 n11 n12 \| n1p
36					~word1 n21 n22 \| n2p
37					--------------
38					np1 np2 npp
39
40					where n11 is the number of times occur together, and
41					n12 is the number of times occurs with some word other than
42					word2, and n1p is the number of times in total that word1 occurs as
43					the first word in a bigram.
44
45					The fishers exact tests are calculated by fixing the marginal totals
46					and computing the hypergeometric probabilities for all the possible
47					contingency tables,
48
49					A right sided test is calculated by adding the probabilities of all
50					the possible two by two contingency tables formed by fixing the
51					marginal totals and changing the value of n11 to greater than or
52					equal to the given value. A right sided Fisher's Exact Test tells us
53					how likely it is to randomly sample a table where n11 is greater
54					than observed. In other words, it tells us how likely it is to sample
55					an observation where the two words are more dependent than currently
56					observed.
57
58					=head2 Methods
59
60					=over
61
62					=cut
63
64					package Text::NSP::Measures::2D::Fisher::right;
65
66
67	1		1	1387	use Text::NSP::Measures::2D::Fisher;
	1			2
	1			184
68	1		1	4	use strict;
	1			2
	1			16
69	1		1	4	use Carp;
	1			1
	1			42
70	1		1	4	use warnings;
	1			1
	1			35
71	1		1	4	no warnings 'redefine';
	1			2
	1			419
72					require Exporter;
73
74					our ($VERSION, @EXPORT, @ISA);
75
76					@ISA = qw(Exporter);
77
78					@EXPORT = qw(initializeStatistic calculateStatistic
79					getErrorCode getErrorMessage getStatisticName);
80
81					$VERSION = '0.97';
82
83
84					=item calculateStatistic() - This method calculates the right Fisher value
85
86					INPUT PARAMS : $count_values .. Reference of an hash containing
87					the count values computed by the
88					count.pl program.
89
90					RETURN VALUES : $right .. Right Fisher value.
91
92					=cut
93
94					sub calculateStatistic
95					{
96	12		12	167	my %values = @_;
97
98	12			12	my $probabilities;
99	12			14	my $left_flag = 0;
100
101					# computes and returns the observed and marginal values from
102					# the frequency combination values. returns 0 if there is an
103					# error in the computation or the values are inconsistent.
104	12	100		29	if( !(Text::NSP::Measures::2D::Fisher::getValues(\%values)) )
105					{
106	10			21	return;
107					}
108
109	2	50		5	my $final_limit = ($n1p < $np1) ? $n1p : $np1;
110	2			3	my $n11_org = $n11;
111
112	2			4	my $n11_start = $n1p + $np1 - $npp;
113	2	100		5	if($n11_start < $n11)
114					{
115	1			2	$n11_start = $n11;
116					}
117
118
119					# to make the computations faster, we check which would require less computations
120					# computing the leftfisher value and subtracting it from 1 or directly computing
121					# the right fisher value. We do this since, generally for bigrams n11 is quite small
122					# so its much faster to compute the left Fisher value.
123	2			3	my $left_final_limit = $n11-1;
124	2			4	my $left_n11 = $n1p + $np1 - $npp;
125	2	100		8	if($left_n11<0)
126					{
127	1			2	$left_n11 = 0;
128					}
129
130					# if computing the left fisher values first will take lesser amount of time them
131					# we set a flag for later reference and then compute the leftfisher score for
132					# n11-1 and then subtract the total score from one to get the right fisher value.
133	2	50		6	if(($left_final_limit - $left_n11) < ($final_limit - $n11_start))
134					{
135	2			2	$left_flag = 1;
136	2	50		8	if( !($probabilities = Text::NSP::Measures::2D::Fisher::computeDistribution($left_n11, $left_final_limit)))
137					{
138	0			0	return;
139					}
140					}
141
142					#else we compute the value normally and simply sum to get the rightfisher value.
143					else
144					{
145	0	0		0	if( !($probabilities = Text::NSP::Measures::2D::Fisher::computeDistribution($n11_start, $final_limit)))
146					{
147	0			0	return;
148					}
149					}
150
151	2			2	my $key_n11;
152
153	2			3	my $rightfisher=0;
154
155	2			7	foreach $key_n11 (sort { $b <=> $a } keys %$probabilities)
	24			24
156					{
157	11	50		18	if($left_flag)
158					{
159	11	100		19	if($key_n11 >= $n11_org)
160					{
161	1			2	last;
162					}
163					}
164					else
165					{
166	0	0		0	if($key_n11 < $n11_org)
167					{
168	0			0	last;
169					}
170					}
171	10			32	$rightfisher += exp($probabilities->{$key_n11});
172					}
173
174					# if we computed the leftfisher value to get the right fisher value, we subtract
175					# the sum of the probabilities for the tables from one to get the right fisher score.
176	2	50		11	if($left_flag)
177					{
178	2	50		4	if ($rightfisher > 1)
179					{
180	0			0	$rightfisher = 0;
181					}
182					else
183					{
184	2			3	$rightfisher = 1 - $rightfisher;
185					}
186					}
187
188	2			12	return $rightfisher;
189					}
190
191
192					=item getStatisticName() - Returns the name of this statistic
193
194					INPUT PARAMS : none
195
196					RETURN VALUES : $name .. Name of the measure.
197
198					=cut
199
200					sub getStatisticName
201					{
202	0		0		return "Right Fisher";
203					}
204
205
206
207					1;
208					__END__