File Coverage

blib/lib/Text/NSP/Measures/2D/CHI.pm

Criterion	Covered	Total	%
statement	32	45	71.1
branch	18	24	75.0
condition			n/a
subroutine	6	6	100.0
pod	2	2	100.0
total	58	77	75.3

line	stmt	bran	sub	pod	time	code
1						=head1 NAME
2
3						Text::NSP::Measures::2D::CHI - Perl module that provides error checks
4						for the Pearson's chi squared, phi coefficient
5						and the Tscore measures.
6
7						=head1 SYNOPSIS
8
9						=head3 Basic Usage
10
11						use Text::NSP::Measures::2D::CHI::x2;
12
13						my $npp = 60; my $n1p = 20; my $np1 = 20; my $n11 = 10;
14
15						$x2_value = calculateStatistic( n11=>$n11,
16						n1p=>$n1p,
17						np1=>$np1,
18						npp=>$npp);
19
20						if( ($errorCode = getErrorCode()))
21						{
22						print STDERR $errorCode." - ".getErrorMessage()."\n"";
23						}
24						else
25						{
26						print getStatisticName."value for bigram is ".$x2_value."\n"";
27						}
28
29						=head1 DESCRIPTION
30
31						This module is the base class for the Chi-squared and Phi coefficient
32						measures. This module provides error checks specific for these measures,
33						it also implements the computations that are common to these measures.
34
35						=over
36
37						=item Pearson's Chi-Squared
38
39						x2 = 2 * [((n11 - m11)/m11)^2 + ((n12 - m12)/m12)^2 +
40						((n21 - m21)/m21)^2 + ((n22 -m22)/m22)^2]
41
42						=item Phi Coefficient
43
44						PHI^2 = ((n11 * n22) - (n21 * n21))^2/(n1p * np1 * np2 * n2p)
45
46						=item T-Score
47
48						tscore = (n11 - m11)/sqrt(n11)
49
50						=back
51
52						Note that the value of PHI^2 is equivalent to
53						Pearson's Chi-Squared test multiplied by the sample size, that is:
54
55						Chi-Squared = npp * PHI^2
56
57						Although T-score seems quite different from the other two measures we
58						have put it in the CHI family because like the other two measures it
59						uses the difference between the observed and expected values and is also
60						quite similar in ranking the bigrams.
61
62						=over
63
64						=cut
65
66
67						package Text::NSP::Measures::2D::CHI;
68
69
70	5		5		4573	use Text::NSP::Measures::2D;
	5				14
	5				1394
71	5		5		29	use strict;
	5				10
	5				144
72	5		5		25	use Carp;
	5				9
	5				377
73	5		5		1091	use warnings;
	5				12
	5				3639
74						# use subs(calculateStatistic);
75						require Exporter;
76
77						our ($VERSION, @EXPORT, @ISA);
78
79						@ISA = qw(Exporter);
80
81						@EXPORT = qw(initializeStatistic calculateStatistic
82						getErrorCode getErrorMessage getStatisticName
83						$n11 $n12 $n21 $n22 $m11 $m12 $m21 $m22
84						$npp $np1 $np2 $n2p $n1p $errorCodeNumber
85						$errorMessage);
86
87						$VERSION = '1.03';
88
89						=item getValues() - This method calls the computeMarginalTotals(),
90						computeObservedValues() and the computeExpectedValues() methods to
91						compute the observed and expected values. It checks thees values for
92						any errors that might cause the PHI and x2 measures to fail.
93
94						INPUT PARAMS : $count_values .. Reference of an hash containing
95						the count values computed by the
96						count.pl program.
97
98						RETURN VALUES : 1/undef ..returns '1' to indicate success
99						and an undefined(NULL) value to indicate
100						failure.
101
102						=cut
103
104						sub getValues
105						{
106	55		55	1	80	my ($values)=@_;
107
108	55	100			147	if(!(Text::NSP::Measures::2D::computeMarginalTotals($values)) ) {
109	15				50	return;
110						}
111
112	40	100			103	if( !(Text::NSP::Measures::2D::computeObservedValues($values)) ) {
113	15				49	return;
114						}
115
116	25	50			67	if( !(Text::NSP::Measures::2D::computeExpectedValues($values)) ) {
117	0				0	return;
118						}
119
120						# dont want ($nxy / $mxy) to be 0 or less! flag error if so and return;
121	25	50			57	if ( $n11 )
122						{
123	25	50			63	if ($m11 == 0)
124						{
125	0				0	$errorMessage = "Expected value in cell (1,1) must not be zero";
126	0				0	$errorCodeNumber = 221;
127	0				0	return;
128						}
129						}
130	25	100			61	if ( $n12 )
131						{
132	22	50			44	if ($m12 == 0)
133						{
134	0				0	$errorMessage = "Expected value in cell (1,2) must not be zero";
135	0				0	$errorCodeNumber = 221;
136	0				0	return;
137						}
138						}
139	25	100			54	if ( $n21 )
140						{
141	24	50			109	if ($m21 == 0)
142						{
143	0				0	$errorMessage = "Expected value in cell (2,1) must not be zero";
144	0				0	$errorCodeNumber = 221;
145	0				0	return;
146						}
147						}
148	25	100			57	if ( $n22 )
149						{
150	20	50			68	if ($m22 == 0)
151						{
152	0				0	$errorMessage = "Expected value in cell (2,2) must not be zero";
153	0				0	$errorCodeNumber = 221;
154	0				0	return;
155						}
156						}
157						# Everything looks good so we can return 1
158	25				85	return 1;
159						}
160
161
162
163
164						=item computeVal() - Computes the deviation in observed value with respect
165						to the expected values
166
167						INPUT PARAMS : $n ..Observed value
168						$m ..Expected value
169
170						RETURN VALUES : (n-m)^2/m ..the log of the ratio of
171						observed value to expected
172						value.
173
174						=cut
175
176						sub computeVal
177						{
178	28		28	1	31	my $n = shift;
179	28				31	my $m = shift;
180	28	100			43	if($m)
181						{
182	23				98	return (($n-$m)**2)/$m;
183						}
184						else
185						{
186	5				11	return 0;
187						}
188						}
189
190
191
192						1;
193						__END__