File Coverage

blib/lib/Statistics/Candidates.pm
Criterion Covered Total %
statement 70 73 95.8
branch 13 20 65.0
condition 3 9 33.3
subroutine 10 11 90.9
pod 4 5 80.0
total 100 118 84.7


line stmt bran cond sub pod time code
1             package Statistics::Candidates;
2              
3             ##---------------------------------------------------------------------------##
4             ## Author:
5             ## Hugo WL ter Doest terdoest@cs.utwente.nl
6             ## Description:
7             ## Object/methods for candidate features
8             ##
9             ##---------------------------------------------------------------------------##
10             ## Copyright (C) 1998 Hugo WL ter Doest terdoest@cs.utwente.nl
11             ##
12             ## This library is free software; you can redistribute it and/or modify
13             ## it under the terms of the GNU General Public License as published by
14             ## the Free Software Foundation; either version 2 of the License, or
15             ## (at your option) any later version.
16             ##
17             ## This library is distributed in the hope that it will be useful,
18             ## but WITHOUT ANY WARRANTY; without even the implied warranty of
19             ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20             ## GNU General Public License for more details.
21             ##
22             ## You should have received a copy of the GNU Library General Public
23             ## License along with this program; if not, write to the Free Software
24             ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25             ##---------------------------------------------------------------------------##
26              
27              
28             ##---------------------------------------------------------------------------##
29             ## Globals
30             ##---------------------------------------------------------------------------##
31 2         143 use vars qw($VERSION
32             @ISA
33             @EXPORT
34 2     2   1617 $VECTOR_PACKAGE);
  2         5  
35              
36              
37             ##---------------------------------------------------------------------------##
38             ## Require libraries
39             ##---------------------------------------------------------------------------##
40 2     2   14 use strict;
  2         4  
  2         82  
41 2     2   9 use diagnostics -verbose;
  2         4  
  2         18  
42 2     2   53 use Carp;
  2         4  
  2         109  
43 2     2   10 use Statistics::SparseVector;
  2         5  
  2         1739  
44             $VECTOR_PACKAGE = "Statistics::SparseVector";
45             require Exporter;
46             require AutoLoader;
47              
48             @ISA = qw(Exporter AutoLoader);
49             # Items to export into callers namespace by default. Note: do not export
50             # names by default without a very good reason. Use EXPORT_OK instead.
51             # Do not simply export all your public functions/methods/constants.
52             @EXPORT = qw(
53             );
54              
55              
56             sub new {
57 1     1 1 13 my($this, $arg) = @_;
58              
59             # for calling $self->new($someth):
60 1   33     9 my $class = ref($this) || $this;
61 1         3 my $self = {};
62 1         4 bless $self, $class;
63 1 50       4 if ($arg) {
64 1         8 $self->read($arg);
65             }
66 1         5 return($self);
67             }
68              
69              
70 0     0   0 sub DESTROY {
71              
72             }
73              
74              
75             # reads a candidates file
76             # dies if insufficient events or inconsistent lines
77             # syntax first line: ...
78             # syntax other lines:
79             sub read {
80 1     1 0 208 my($self, $file) = @_;
81              
82 1         1 my($features,
83             $sum,
84             $event,
85             $candidate_names);
86              
87             # prologue
88 1 50       56 open(CANDS, $file) ||
89             die "Could not open $file\n";
90 1         6 print "Opened $file\n";
91              
92             # read candidate names, skip comments
93 1         2 $candidate_names = "";
94 1         2 do {
95 1         25 $candidate_names = ;
96             } until ($candidate_names !~ /\#.*/);
97 1         3 chomp $candidate_names;
98 1         18 $self->{CANDIDATE_NAMES} = [split(/\t/,$candidate_names)];
99              
100             # read the candidate bitvectors
101 1         3 $self->{NR_CANDIDATES} = 0;
102 1         3 $self->{NR_CLASSES} = 0;
103 1         4 while () {
104 100 50       217 if (!/\#.*/) {
105 100         113 chomp;
106 100         115 $features = $_;
107 100 100       166 if ($self->{NR_CLASSES} == 0) {
108 1         3 $self->{NR_CANDIDATES} = length($features);
109             }
110             else {
111 99 50       205 if ($self->{NR_CANDIDATES} != length($features)) {
112 0         0 croak "Candidate file corrupt ".
113             "(line $self->{NR_CLASSES} has insufficient features)\n";
114             }
115             }
116 100         301 $self->{CANDIDATES}[$self->{NR_CLASSES}++] =
117             $VECTOR_PACKAGE->new_Bin($self->{NR_CANDIDATES}, $features);
118             }
119             }
120              
121             # epilogue
122 1         12 close(CANDS);
123             # check the candidates for constant functions
124 1         6 $self->check();
125 1         7 print "Read $self->{NR_CANDIDATES} candidates for $self->{NR_CLASSES} events; ";
126 1         5 print "closed $file\n";
127             }
128              
129              
130             # check whether for all features f, \sum_x f(x) > 0, and
131             # \sum_x f(x) != nr_classes
132             sub check {
133 1     1 1 3 my($self) = @_;
134              
135 1         1 my($x,
136             $f,
137             $sum);
138              
139 1         11 for ($f = 0; $f < $self->{NR_CANDIDATES}; $f++) {
140 3         5 $sum = 0;
141 3         9 for ($x = 0; $x < $self->{NR_CLASSES}; $x++) {
142 300         776 $sum += $self->{CANDIDATES}[$x]->bit_test($f);
143             }
144 3 50 33     24 if (!$sum || ($sum == $self->{NR_CLASSES})) {
145 0         0 croak "Candidate ",$f+1, " is constant, remove it\n";
146             }
147             }
148             }
149              
150              
151             # writes remaining candidates to a file
152             # syntax: same as input candidates file
153             sub write {
154 1     1 1 15629 my($self, $file) = @_;
155              
156 1         1082 my($x,
157             $f);
158              
159 1 50 33     910 if (($self->{NR_CANDIDATES} > 0) && ($self->{NR_CLASSES})) {
160 1 50       209 open(CANDIDATES,">$file") ||
161             die "Could not open $file\n";
162 1         12 print "Opened $file\n";
163              
164             # write the list of candidate names that were not added
165 1         13 for ($f = 0; $f < $self->{NR_CANDIDATES}; $f++) {
166 3 100       31 if (!$self->{ADDED}{$f}) {
167 1         18 print CANDIDATES "$self->{CANDIDATE_NAMES}[$f]\t";
168             }
169             }
170 1         1862 print CANDIDATES "\n";
171              
172             # write candidates that were not added
173 1         16 for ($x = 0; $x < $self->{NR_CLASSES}; $x++) {
174 100         310 for ($f = 0; $f < $self->{NR_CANDIDATES}; $f++) {
175 300 100       1623 if (!$self->{ADDED}{$f}) {
176 100         534 print CANDIDATES $self->{CANDIDATES}[$x]->bit_test($f);
177             }
178             }
179 100         478 print CANDIDATES "\n";
180             }
181              
182 1         89 close CANDIDATES;
183 1         12 print "Closed $file\n";
184             }
185             }
186              
187              
188             sub clear {
189 4     4 1 59 my($self) = @_;
190              
191 4         19 undef $self->{ADDED};
192             }
193              
194             1;
195              
196             __END__