File Coverage

blib/lib/Text/NSP/Measures/2D/Fisher/left.pm

Criterion	Covered	Total	%
statement	31	34	91.1
branch	6	8	75.0
condition			n/a
subroutine	6	7	85.7
pod			n/a
total	43	49	87.7

line	stmt	bran	sub	time	code
1					=head1 NAME
2
3					Text::NSP::Measures::2D::Fisher::left - Perl module implementation of the left sided
4					Fisher's exact test.
5
6					=head1 SYNOPSIS
7
8					=head3 Basic Usage
9
10					use Text::NSP::Measures::2D::Fisher::left;
11
12					my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
13
14					$left_value = calculateStatistic( n11=>$n11,
15					n1p=>$n1p,
16					np1=>$np1,
17					npp=>$npp);
18
19					if( ($errorCode = getErrorCode()))
20					{
21					print STDERR $errorCode." - ".getErrorMessage();
22					}
23					else
24					{
25					print getStatisticName."value for bigram is ".$left_value;
26					}
27
28
29					=head1 DESCRIPTION
30
31					Assume that the frequency count data associated with a bigram
32					is stored in a 2x2 contingency table:
33
34					word2 ~word2
35					word1 n11 n12 \| n1p
36					~word1 n21 n22 \| n2p
37					--------------
38					np1 np2 npp
39
40					where n11 is the number of times occur together, and
41					n12 is the number of times occurs with some word other than
42					word2, and n1p is the number of times in total that word1 occurs as
43					the first word in a bigram.
44
45					The fishers exact tests are calculated by fixing the marginal totals
46					and computing the hypergeometric probabilities for all the possible
47					contingency tables,
48
49					A left sided test is calculated by adding the probabilities of all
50					the possible two by two contingency tables formed by fixing the
51					marginal totals and changing the value of n11 to less than the given
52					value. A left sided Fisher's Exact Test tells us how likely it is to
53					randomly sample a table where n11 is less than observed. In other words,
54					it tells us how likely it is to sample an observation where the two words
55					are less dependent than currently observed.
56
57					=head2 Methods
58
59					=over
60
61					=cut
62
63
64					package Text::NSP::Measures::2D::Fisher::left;
65
66
67	1		1	1484	use Text::NSP::Measures::2D::Fisher;
	1			3
	1			217
68	1		1	5	use strict;
	1			2
	1			20
69	1		1	4	use Carp;
	1			2
	1			57
70	1		1	5	use warnings;
	1			2
	1			114
71	1		1	6	no warnings 'redefine';
	1			2
	1			348
72					require Exporter;
73
74					our ($VERSION, @EXPORT, @ISA);
75
76					@ISA = qw(Exporter);
77
78					@EXPORT = qw(initializeStatistic calculateStatistic
79					getErrorCode getErrorMessage getStatisticName);
80
81					$VERSION = '0.97';
82
83
84					=item calculateStatistic() - This method computes the left sided Fishers
85					exact test.
86
87					INPUT PARAMS : $count_values .. Reference of an array containing
88					the count values computed by the
89					count.pl program.
90
91					RETURN VALUES : $left .. Left Fisher value.
92
93					=cut
94
95					sub calculateStatistic
96					{
97	12		12	240	my %values = @_;
98	12			19	my $probabilities;
99
100					# computes and returns the observed and marginal values from
101					# the frequency combination values. returns 0 if there is an
102					# error in the computation or the values are inconsistent.
103	12	100		38	if( !(Text::NSP::Measures::2D::Fisher::getValues(\%values)) )
104					{
105	10			29	return;
106					}
107
108	2			5	my $final_limit = $n11;
109	2			6	my $n11_start = $n1p + $np1 - $npp;
110	2	100		6	if($n11_start<0)
111					{
112	1			3	$n11_start = 0;
113					}
114
115	2	50		8	if( !($probabilities = Text::NSP::Measures::2D::Fisher::computeDistribution($n11_start, $final_limit)))
116					{
117	0			0	return;
118					}
119
120
121	2			3	my $key_n11;
122
123	2			5	my $leftfisher=0;
124
125	2			11	foreach $key_n11 (sort { $a <=> $b } keys %$probabilities)
	27			40
126					{
127	12	50		43	if($key_n11>$final_limit)
128					{
129	0			0	last;
130					}
131	12			40	$leftfisher += exp($probabilities->{$key_n11});
132					}
133
134	2			14	return $leftfisher;
135					}
136
137
138					=item getStatisticName()
139
140					Returns the name of this statistic
141
142					INPUT PARAMS : none
143
144					RETURN VALUES : $name .. Name of the measure.
145
146					=cut
147
148					sub getStatisticName
149					{
150	0		0		return "Left Fisher";
151					}
152
153
154
155					1;
156					__END__