File Coverage

blib/lib/Text/NSP/Measures/2D/Fisher/twotailed.pm

Criterion	Covered	Total	%
statement	33	35	94.2
branch	8	10	80.0
condition			n/a
subroutine	6	7	85.7
pod			n/a
total	47	52	90.3

line	stmt	bran	sub	time	code
1					=head1 NAME
2
3					Text::NSP::Measures::2D::Fisher::twotailed - Perl module implementation of the two-sided
4					Fisher's exact test.
5
6					=head1 SYNOPSIS
7
8					=head3 Basic Usage
9
10					use Text::NSP::Measures::2D::Fisher::twotailed;
11
12					my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
13
14					$twotailed_value = calculateStatistic( n11=>$n11,
15					n1p=>$n1p,
16					np1=>$np1,
17					npp=>$npp);
18
19					if( ($errorCode = getErrorCode()))
20					{
21					print STDERR $errorCode." - ".getErrorMessage();
22					}
23					else
24					{
25					print getStatisticName."value for bigram is ".$twotailed_value;
26					}
27
28					=head1 DESCRIPTION
29
30					Assume that the frequency count data associated with a bigram
31					is stored in a 2x2 contingency table:
32
33					word2 ~word2
34					word1 n11 n12 \| n1p
35					~word1 n21 n22 \| n2p
36					--------------
37					np1 np2 npp
38
39					where n11 is the number of times occur together, and
40					n12 is the number of times occurs with some word other than
41					word2, and n1p is the number of times in total that word1 occurs as
42					the first word in a bigram.
43
44					The fishers exact tests are calculated by fixing the marginal totals
45					and computing the hypergeometric probabilities for all the possible
46					contingency tables,
47
48					A twotailed fishers test is calculated by adding the probabilities of
49					all the contingency tables with probabilities less than the probability
50					of the observed table. The twotailed fishers test tells us how likely
51					it would be to observe an contingency table which is less probable than
52					the current table.
53
54					=head2 Methods
55
56					=over
57
58					=cut
59
60					package Text::NSP::Measures::2D::Fisher::twotailed;
61
62
63	1		1	1890	use Text::NSP::Measures::2D::Fisher;
	1			3
	1			417
64	1		1	6	use strict;
	1			2
	1			27
65	1		1	5	use Carp;
	1			2
	1			58
66	1		1	5	use warnings;
	1			1
	1			22
67	1		1	4	no warnings 'redefine';
	1			2
	1			276
68					require Exporter;
69
70					our ($VERSION, @EXPORT, @ISA);
71
72					@ISA = qw(Exporter);
73
74					@EXPORT = qw(initializeStatistic calculateStatistic
75					getErrorCode getErrorMessage getStatisticName);
76
77					$VERSION = '0.97';
78
79
80					=item calculateStatistic() - This method calculates the twotailed
81					Fisher value
82
83					INPUT PARAMS : $count_values .. Reference of an array containing
84					the count values computed by the
85					count.pl program.
86
87					RETURN VALUES : $twotailed .. Twotailed Fisher value.
88
89					=cut
90
91					sub calculateStatistic
92					{
93	12		12	2495	my %values = @_;
94
95	12			19	my $probabilities;
96
97					# computes and returns the observed and marginal values from
98					# the frequency combination values. returns 0 if there is an
99					# error in the computation or the values are inconsistent.
100	12	100		37	if( !(Text::NSP::Measures::2D::Fisher::getValues(\%values)) )
101					{
102	10			27	return;
103					}
104
105	2	50		8	my $final_limit = ($n1p < $np1) ? $n1p : $np1;
106
107	2			3	my $n11_org = $n11;
108	2			5	my $n11_start = $n1p + $np1 - $npp;
109	2	100		6	if($n11_start<0)
110					{
111	1			3	$n11_start = 0;
112					}
113
114	2	50		8	if( !($probabilities = Text::NSP::Measures::2D::Fisher::computeDistribution($n11_start, $final_limit)))
115					{
116	0			0	return;
117					}
118
119	2			3	my $value;
120
121	2			2	my $ttfisher=0;
122
123	2			13	foreach $value (sort { $a <=> $b } values %$probabilities)
	97			100
124					{
125	32	100		72	if($value > $probabilities->{$n11_org})
126					{
127	12			15	next;
128					}
129	20			47	$ttfisher += exp($value);
130					}
131
132	2			18	return $ttfisher;
133					}
134
135
136					=item getStatisticName() - Returns the name of this statistic
137
138					INPUT PARAMS : none
139
140					RETURN VALUES : $name .. Name of the measure.
141
142					=cut
143
144					sub getStatisticName
145					{
146	0		0		return "Two Tailed Fisher";
147					}
148
149
150
151					1;
152					__END__