File Coverage

blib/lib/MyBioinfo/Math.pm
Criterion Covered Total %
statement 18 70 25.7
branch 0 14 0.0
condition n/a
subroutine 6 10 60.0
pod 0 4 0.0
total 24 98 24.4


line stmt bran cond sub pod time code
1             package MyBioinfo::Math;
2              
3 1     1   34 use 5.006;
  1         4  
  1         43  
4 1     1   6 use strict;
  1         1  
  1         34  
5 1     1   8 use warnings;
  1         4  
  1         1132  
6 1     1   7 use Math::CDF qw(pchisq);
  1         3  
  1         54  
7 1     1   6 use Data::Dumper;
  1         2  
  1         67  
8 1     1   5 use MyBioinfo::Common qw(sum);
  1         2  
  1         991  
9             require Exporter;
10              
11             our @ISA = qw(Exporter);
12              
13             # Items to export into callers namespace by default. Note: do not export
14             # names by default without a very good reason. Use EXPORT_OK instead.
15             # Do not simply export all your public functions/methods/constants.
16              
17             # This allows declaration use MyBioinfo::Common ':all';
18             # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
19             # will save memory.
20             our %EXPORT_TAGS = ( 'all' => [ qw(
21             gtest_gof chisqtest_gof
22             ) ] );
23              
24             our @EXPORT_OK = @{ $EXPORT_TAGS{'all'} };
25              
26             our @EXPORT = ();
27              
28             our $VERSION = '0.11';
29              
30             ############### CHANGES: ###################
31             # Mar. 1 2012 15:25 Added protection in G-test when round-off error causes
32             # G-statistic to be a small negative.
33             #
34              
35             ######## Preloaded methods go here. ##############
36              
37             # Use G-test to perform Goodness-of-fit test.
38             sub gtest_gof{
39 0     0 0   my $ra = shift; # reference to array of numbers.
40 0           my $rp; # reference to array of probabilities.
41             # If vector p is not specified, assume uniform probability.
42 0 0         if(@_ > 0){
43 0           $rp = shift;
44 0 0         if(!&p_sanity($rp)){
45 0           die "Probability vector contains elements that are zero or negative. G-test was not performed. Abort.\n";
46             }
47             }else{
48 0           $rp = [ (1/@{$ra}) x @{$ra} ];
  0            
  0            
49             }
50             # Fill out the vector of expectated values.
51 0           my @a_exp = fill_exp($ra, $rp);
52             # Calcualte G-statistic.
53 0           my $g_stat = 0;
54 0           for(my $i = 0; $i < @a_exp; $i++){
55 0 0         if($ra->[$i] != 0){ # skip zero entry in observation vector.
56 0           $g_stat += $ra->[$i] * log( $ra->[$i] / $a_exp[$i] );
57             }
58             }
59 0           $g_stat *= 2.0;
60             # Use Chi-square distribution to calculate p-value.
61 0           my $df = @{$ra} - 1;
  0            
62 0           my $p; # p-value.
63 0 0         if($g_stat <= 0){ # this may happen due to round-off errors.
64 0           $p = 1;
65             }else{
66 0           $p = 1 - pchisq($g_stat, $df);
67             }
68 0           return ($g_stat, $df, $p);
69             }
70              
71             # Use Chi-square test to perform Goodness-of-fit test.
72             sub chisqtest_gof{
73 0     0 0   my $ra = shift; # reference to array of numbers.
74 0           my $rp; # reference to array of probabilities.
75             # If vector p is not specified, assume uniform probability.
76 0 0         if(@_ > 0){
77 0           $rp = shift;
78 0 0         if(!&p_sanity($rp)){
79 0           die "Probability vector contains elements that are zero or negative. Chi-square test was not performed. Abort.\n";
80             }
81             }else{
82 0           $rp = [ (1/@{$ra}) x @{$ra} ];
  0            
  0            
83             }
84             # Fill out the vector of expectated values.
85 0           my @a_exp = fill_exp($ra, $rp);
86             # Calcualte Chisq-statistic.
87 0           my $ch_stat = 0;
88 0           for(my $i = 0; $i < @a_exp; $i++){
89 0           $ch_stat += ( $ra->[$i] - $a_exp[$i] ) ** 2 / $a_exp[$i];
90             }
91             # Use Chi-square distribution to calculate p-value.
92 0           my $df = @{$ra} - 1;
  0            
93 0           my $p = 1 - pchisq($ch_stat, $df);
94 0           return ($ch_stat, $df, $p);
95             }
96              
97             # Fill out a vector of expected values. A common subroutine used by both G-test
98             # and Chi-square test.
99             sub fill_exp{
100 0     0 0   my($ra, $rp) = @_;
101 0           my $v_sum = sum(@{$ra});
  0            
102 0           my @a_exp;
103 0           for(my $i = 0; $i < @{$ra}; $i++){
  0            
104 0           push @a_exp, $v_sum * $rp->[$i];
105             }
106 0           return @a_exp;
107             }
108              
109             # Check the sanity for probability vector: all elements should be larger than zero.
110             sub p_sanity{
111 0     0 0   my $p_ref = shift;
112 0           foreach my $p(@{$p_ref}){
  0            
113 0 0         return 0 if $p <= 0;
114             }
115 0           return 1;
116             }
117              
118              
119              
120             1;
121             __END__