File Coverage

blib/lib/Statistics/ROC.pm
Criterion Covered Total %
statement 247 297 83.1
branch 65 98 66.3
condition 21 75 28.0
subroutine 15 16 93.7
pod 7 11 63.6
total 355 497 71.4


line stmt bran cond sub pod time code
1             #LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
2             #
3             # ROC.pm - A Perl module implementing receiver-operator-characteristic (ROC)
4             # curves with nonparametric confidence bounds
5             #
6             # Copyright (c) 1998 Hans A. Kestler. All rights reserved.
7             # This program is free software; you may redistribute it and/or
8             # modify it under the same terms as Perl itself.
9             #
10             # This code implements a method for constructing nonparametric confidence
11             # for ROC curves described in
12             # R.A. Hilgers, Distribution-Free Confidence Bounds for ROC Curves,
13             # Meth Inform Med 1991; 30:96-101
14             # Additionally some auxilliary functions were ported (and corrected) from
15             # Fortran (Applied Statistics, ACM).
16             #
17             # Written in Perl by Hans A. Kestler.
18             # Bugs, comments, suggestions to:
19             # Hans A. Kestler
20             #
21             #
22             #
23             #LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
24              
25             package Statistics::ROC;
26             require 5;
27 1     1   22777 use Carp;
  1         3  
  1         89  
28 1     1   6 use strict;
  1         1  
  1         37  
29 1     1   5 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
  1         7  
  1         2177  
30              
31             require Exporter;
32              
33             @ISA = ('Exporter');
34             @EXPORT = qw(
35             roc rank loggamma betain Betain xinbta Xinbta
36             );
37             $VERSION = '0.01';
38              
39              
40              
41              
42             # Algorithm 291, Logarithm of the gamma function.
43             # in Collected Algorithms of the ACM, Vol II, 1980
44             # M.C. Pike and I.D. Hill with remark by M.R. Hoare
45             # see also Pike, M.C. and Hill, I.D. (1966). Algorithm 291. Logarithm of the
46             # gamma function. Commun. Ass. Comput. Mach., 9,684.
47              
48             sub loggamma($){
49             # This procedure evaluates the natural logarithm of gamma(x) for all
50             # x>0, accurate to 10 decimal places. Stirlings formula is used for the
51             # central polynomial part of the procedure
52            
53 145     145 1 168 my $x= shift; # default arg is @_
54 145         157 my ($f, $z);
55            
56            
57 145 50       263 if($x==0){return 743.746924740801} # this is: loggamma(9.9999999999E-324)
  0         0  
58            
59 145 100       211 if($x < 7)
60             {
61 80         248 for($z=$x,$f=1;$z<7;$z++)
62             {
63 296         247 $x=$z; $f*=$z;
  296         1375  
64             }
65 80         73 $x++;
66 80         123 $f= -log($f); # log returns the natural logarithm
67             }
68 65         84 else{ $f=0;}
69 145         165 $z=1/($x*$x);
70 145         803 return $f+($x-.5)*log($x)-$x+.918938533204673+
71             (((-.000595238095238*$z+.000793650793651)*$z-
72             .002777777777778)*$z+.083333333333333)/$x;
73             }
74              
75              
76              
77             # Algorithm AS 63 with remark AS R19,
78             # Computes incomplete beta function ratio
79             # K.L. Majumder and G.P. Bhattacharjee (1973). The Incomplete Beta Integral,
80             # Appl. Statist.,22:409:411 and
81             # G.W. Cran, K.J. Martin and G.E. Thomas (1977).Remark AS R19 and
82             # Algorithm AS109, A Remark on Algorithms AS 63: The Incomplete Beta Integral
83             # AS 64: Inverse of the Incomplete Beta Function Ratio,
84             # Appl. Statist., 26:111-114.
85             #
86             # Remarks:
87             # Complete beta function: B(p,q)=gamma(p)*gamma(q)/gamma(p+q)
88             # log(B(p,q))=ln(gamma(p))+ln(gamma(q))-ln(gamma(p+q))
89             #
90             # Incomplete beta function ratio:
91             # I_x(p,q)=1/B(p,q) * \int_0^x t^{p-1}*(1-t)^{q-1} dt
92             #
93             # --> log(B(p,q)) has to be supplied to calculate I_x(p,q)
94             # log denotes the natural logarithm
95             # $beta = log(B(p,q))
96             # $x = x
97             # $p = p
98             # $q = q
99             # The subroutine returns I_x(p,q). If an error occurs a negative value
100             # {-1,-2} is returned.
101            
102             sub betain($$$$){
103             # Computes incomplete beta function ratio for arguments
104             # $x between zero and one, $p and $q positive.
105             # Log of complete beta function, $beta, is assumed to be known.
106            
107 158     158 1 265 my ($x, $p, $q, $beta) = @_;
108 158         168 my ($xx, $psq, $cx, $pp, $qq, $index, $betain,
109             $ns, $term, $ai, $rx, $temp);
110 158         252 my $ACU=1.0E-14; # accuracy
111            
112             # tests for admissibility of arguments
113 158 50 33     628 if($p<=0 || $q<=0){ return -1;}
  0         0  
114 158 50 33     631 if($x<0 || $x>1) { return -2;}
  0         0  
115              
116             # change tail if necessary and determine s
117 158         184 $psq=$p+$q; $cx=1-$x;
  158         154  
118 158 100       307 if($p<$psq*$x){ $xx=$cx; $cx=$x; $pp=$q; $qq=$p; $index=1;}
  19         20  
  19         19  
  19         19  
  19         15  
  19         21  
119 139         164 else{ $xx=$x; $pp=$p; $qq=$q; $index=0;}
  139         197  
  139         124  
  139         149  
120 158         204 $term=1; $ai=1; $betain=1;
  158         172  
  158         140  
121 158         177 $ns=$qq+$cx*$psq;
122            
123             # use Soper's reduction formulae
124 158         164 $rx=$xx/$cx;
125 158   66     153 do{
126 873 50       1518 if($ns>=0){$temp=$qq-$ai; if($ns==0){$rx=$xx;}}
  783 100       754  
  783         1858  
  0         0  
127 90         86 else{ $temp=$psq; $psq++;}
  90         78  
128 873         1115 $term *= $temp*$rx/($pp+$ai);
129 873         948 $betain+=$term;
130 873         961 $temp=abs($term); $ai++; $ns--;}
  873         883  
  873         2448  
131             until($temp<=$ACU && $temp<=$ACU*$betain);
132            
133             # calculate result
134 158         879 $betain *= exp($pp*log($xx)+($qq-1)*log($cx)-$beta)/$pp;
135 158 100       2376 if($index){ return 1-$betain;}
  19         44  
136 139         438 else{ return $betain;}
137             }
138            
139             sub Betain($$$){
140             # Computes the incomplete beta function
141             # by calling loggamma() and betain()
142 1     1 1 2 my ($x, $p, $q) = @_;
143            
144 1 50       8 if($x==1){return 1;}
  0 50       0  
  0         0  
145             elsif($x==0){return 0;}
146 1         4 else{ return betain($x, $p, $q,loggamma($p)+loggamma($q)-loggamma($p+$q));}
147             }
148              
149             sub max($$){
150             # computes the maximum of two numbers
151 47     47 0 144 my ($a, $b) = @_;
152            
153 47 50       281 if($a>$b){ return $a;}
  47         110  
154 0         0 else{ return $b;}
155             }
156              
157              
158             # Algorithm AS 109,
159             # Computes inverse of incomplete beta function ratio
160             # G.W. Cran, K.J. Martin and G.E. Thomas (1977).Remark AS R19 and
161             # Algorithm AS109, A Remark on Algorithms AS 63: The Incomplete Beta Integral
162             # AS 64: Inverse of the Incomplete Beta Function Ratio,
163             # Appl. Statist., 26:111-114.
164             #
165             # Remark AS R83 and the correction in vol40(1) of Appl. Statist.(1991), p.236
166             # have been incorporated in this version.
167             # K.J. Berry, P.W. Mielke, Jr and G.W. Cran (1990) Algorithm AS R83, A Remark
168             # on Algorithm AS 109: Inverse of the Incomplete Beta Function Ratio,
169             # Appl. Statist., 39:309-310.
170             #
171             # Remarks:
172             #
173             # Complete beta function: B(p,q)=gamma(p)*gamma(q)/gamma(p+q)
174             # log(B(p,q))=ln(gamma(p))+ln(gamma(q))-ln(gamma(p+q))
175             #
176             # Incomplete beta function ratio:
177             # alpha = I_x(p,q) = 1/B(p,q) * \int_0^x t^{p-1}*(1-t)^{q-1} dt
178             #
179             # --> log(B(p,q)) has to be supplied to calculate I_x(p,q)
180             # log denotes the natural logarithm
181             # $beta = log(B(p,q))
182             # $alpha= I_x(p,q)
183             # $p = p
184             # $q = q
185             # The subroutine returns x. If an error occurs a negative value {-1,-2,-3}
186             # is returned.
187            
188             sub xinbta($$$$){
189             # Computes inverse of incomplete beta function ratio
190             # for given positive values of the arguments $p and $q,
191             # $alpha between zero and one.
192             # Log of complete beta function, $beta is assumed to be known.
193             #
194             # copyright by H.A. Kestler, 1998
195              
196 47     47 1 95 my ($p, $q, $beta, $alpha) = @_;
197 47         152 my ($a, $y, $pp, $qq, $index, $r, $h, $t, $w, $xinbta,
198             $yprev, $prev,$s, $sq, $tx, $adj, $g);
199 47         49 my $SAE=-37;
200 47         101 my $FPU=10**$SAE;
201 47         48 my $ACU;
202            
203             # test for admissibility of parameters
204 47 50 33     211 if($p<=0 || $q<=0){ return -1;}
  0         0  
205 47 50 33     230 if($alpha<0 || $alpha>1) { return -2;}
  0         0  
206 47 50 33     180 if($alpha==0 || $alpha==1){ return $alpha;}
  0         0  
207            
208             # change tail if necessary
209 47 100       82 if($alpha>.5){ $a=1-$alpha; $pp=$q; $qq=$p; $index=1;}
  17         18  
  17         18  
  17         20  
  17         24  
210 30         32 else{ $a=$alpha; $pp=$p; $qq=$q; $index=0;}
  30         28  
  30         28  
  30         48  
211            
212             # calculate the initial approximation
213 47         202 $r=sqrt(-log($a*$a));
214 47         95 $y=$r-(2.30753+.27061*$r)/(1+(.99229+.04481*$r)*$r);
215 47 100 100     166 if($pp>1 && $qq > 1)
216             {
217 31         43 $r=($y*$y-3)/6; $s=1/($pp+$pp-1); $t=1/($qq+$qq-1);
  31         37  
  31         37  
218 31         31 $h=2/($s+$t);
219 31         69 $w=$y*sqrt($h+$r)/$h-($t-$s)*($r+5/6-2/(3*$h));
220 31         1760 $xinbta=$pp/($pp+$qq*exp($w+$w));
221             }
222             else
223             {
224 16         21 $r=$qq+$qq; $t=1/(9*$qq);
  16         25  
225 16         66 $t=$r*(1-$t+$y*sqrt($t))**3;
226 16 50       35 if($t<=0){
227 0         0 $xinbta=1-exp((log((1-$a)*$qq)+$beta)/$qq);
228             }
229             else{
230 16         27 $t=(4*$pp+$r-2)/$t;
231 16 100       86 if($t<=1){ $xinbta=exp((log($a*$pp)+$beta)/$pp);}
  4         13  
232 12         28 else{ $xinbta=1-2/($t+1);}
233             }
234             }
235            
236             # solve for $x by a modified newton-raphson method
237             # using subroutine betain()
238 47         61 $r=1-$pp; $t=1-$qq; $yprev=0; $sq=1; $prev=1;
  47         47  
  47         53  
  47         44  
  47         41  
239 47 50       88 if($xinbta<.0001){ $xinbta=.0001;}
  0         0  
240 47 50       70 if($xinbta>.9999){ $xinbta=.9999;}
  0         0  
241 47         262 $ACU=10**(max(-5/$pp**2-1/$a**.2-13,$SAE));
242 47         58 do{
243 157         426 $y=betain($xinbta,$pp,$qq,$beta);
244 157 50 33     726 if($y==-1 || $y==-2){ return -3;} # betain returns an exception
  0         0  
245 157         418 $y=($y-$a)*exp($beta+$r*log($xinbta)+$t*log(1-$xinbta));
246 157 50       288 if($y*$y<=0){ $prev=max($sq,$FPU);}
  0         0  
247 157         301 $g=1;
248 157   33     162 Label10: do{
249 157         219 do{ $adj=$g*$y; $sq=$adj*$adj; $g/=3;} while($sq>=$prev);
  157         161  
  157         143  
  157         349  
250 157         643 $tx=$xinbta-$adj;}
251             until($tx>=0 && $tx<=1);
252 157 100 66     605 if($prev<=$ACU || $y*$y<=$ACU){ goto Label12;}
  47         696  
253 110 50 33     388 if($tx==0 || $tx==1){ goto Label10;}
  0         0  
254 110         114 $xinbta=$tx; $yprev=$y;}
  110         216  
255             until($adj==0);
256            
257             Label12:
258 47 100       81 if($index){ return 1-$xinbta;}
  17         271  
259 30         121 else{ return $xinbta;}
260             }
261              
262              
263             sub Xinbta($$$){
264             # Computes the inverse of the incomplete beta function
265             # by calling loggamma() and xinbta()
266             #
267             # copyright by H.A. Kestler, 1998
268              
269 47     47 1 74 my ($p, $q, $alpha) = @_;
270            
271 47 50       110 if($alpha==1){return 1;}
  0 50       0  
  0         0  
272             elsif($alpha==0){return 0;}
273 47         80 else{ return xinbta($p, $q,loggamma($p)+loggamma($q)-loggamma($p+$q),
274             $alpha);}
275             }
276              
277              
278              
279             sub rank($\@){
280             # Computes the ranks of the values specified as the second
281             # argument (an array). Returns a vector of ranks
282             # corresponding to the input vector.
283             # Different types of ranking are possible ('high', 'low', 'mean'),
284             # and are specified as first argument.
285             # These differ in the way ties of the input vector, i.e. identical
286             # values, are treated:
287             # 'high' --> replace ranks of identical values with their
288             # highest rank
289             # 'low' --> replace ranks of identical values with their
290             # lowest rank
291             # 'mean' --> replace ranks of identical values with the mean
292             # of their rank
293             #
294             # copyright by H.A. Kestler, 1998
295            
296 9     9 1 16 my ($type, $r) = @_; # $type: type of ranking 'high', 'low' or 'mean'
297             # $r: reference to array of values to be ranked
298 9         9 my (@s, $s, $i, @e, @rk, $rk_m);
299            
300             # calculate initial rank's
301 9         15 @s=sort{$$r[$a]<=>$$r[$b]} 0..$#{$r}; # sort idx num. by values of @r
  120         153  
  9         33  
302 9         32 for($i=0,@rk=@s;$i<@rk;$i++){ $rk[$s[$i]]=$i+1;} # set rank's
  69         138  
303            
304             # treat ties
305 9         28 for($i=1,@e=(); $i<@s; $i++){
306 60 100       162 if($$r[$s[$i]]==$$r[$s[$i-1]]){ # test if there are ties
    100          
307 21         48 push @e,$i-1;} # save index numbers of tied values (minus 1)
308             elsif(@e){ # ties have occured and are now being treated
309 18 100       34 if($type eq'mean'){ # calculate mean value of tied ranks
310 6         7 $rk_m=0;
311 6         12 for(@e,$e[-1]+1){ $rk_m+=$rk[$s[$_]];} $rk_m/=@e+1;
  13         22  
  6         11  
312             }
313 18         29 for(@e,$e[-1]+1){
314 39 100       84 if($type eq 'high'){ $rk[$s[$_]]=$rk[$s[$e[-1]+1]];}
  13 100       25  
    50          
315 13         27 elsif($type eq 'low' ){ $rk[$s[$_]]=$rk[$s[$e[0]]];}
316 13         24 elsif($type eq 'mean'){ $rk[$s[$_]]=$rk_m;}
317 0         0 else{ croak "Wrong type of ranking (high|low|mean).\n";}
318             }
319 18         51 @e=(); # reinitialize @e
320             }
321             }
322 9         71 return @rk;
323             }
324              
325              
326             sub locate(\@$){
327             # Routine to find the index for table lookup which is below
328             # the value to be interpolated.
329             # Given a reference to an array $xx and a value $x a value $j
330             # is returned such that $x is between $xx[$j] and $xx[$j+1].
331             # $xx must be monotonic, either increasing or decreasing.
332             #
333             # This routine is adapted from "Numerical Recipes in C",
334             # second edition, by Press, Teukolsky, Vetterling and Flannery,
335             # Cambridge University Press, 1992.
336             # It uses bisection to find the right place, which has a
337             # comutational complexity of O(log_2(n)).
338             #
339             # copyright by H.A. Kestler, 1998
340              
341 18     18 0 24 my ($xx,$x)=@_;
342 18         21 my ($jl,$ju)=(0,$#{$xx}); # initialize lower and upper limits
  18         39  
343 18         21 my ($jm,$ascend);
344            
345 18         31 $ascend=$$xx[$ju] > $$xx[0];
346            
347             # test if $x is inside of the array
348 18 50 33     177 if(($x>$$xx[$ju] || $x<$$xx[$jl]) && $ascend)
      33        
349 0         0 { croak "Value out of range for table lookup (1): $x.\n";}
350 18 50 33     95 if(($x<$$xx[$ju] || $x>$$xx[$jl]) && !$ascend)
      33        
351 0         0 { croak "Value out of range for table lookup (2): $x.\n";}
352            
353 18         39 while(($ju-$jl)>1) { # If we are not yet done
354 57         68 $jm=int(($ju+$jl)/2); # compute a midpoint,
355 57 100       104 if($x > $xx->[$jm] == $ascend)
356 30         60 { $jl=$jm;} # and replace either the lower limit
357             else
358 27         58 { $ju=$jm;} # or the upper limit, as appropriate.
359             }
360 18         34 return $jl;
361             }
362              
363              
364             sub linlocate(\@$$){
365             # Routine to find the index for table lookup which is below
366             # the value to be interpolated.
367             # Given a reference to an array $xx and a value $x a value $j
368             # is returned such that $x is between $xx[$j] and $xx[$j+1].
369             # $xx must be monotonic, either increasing or decreasing.
370             #
371             # Starts searching linearly from an initial index value
372             # provided as the third argument.
373             # If no index value can be found a negative value is
374             # returned, i.e. -1.
375             #
376             # copyright by H.A. Kestler, 1998
377            
378 0     0 0 0 my ($xx,$x,$index)=@_;
379 0         0 my ($jl,$ju)=(0,$#{$xx}); # initialize lower and upper limits
  0         0  
380 0         0 my $ascend;
381            
382 0         0 $ascend=$$xx[$ju] > $$xx[0];
383            
384             # test if $x is inside of the array
385 0 0 0     0 if(($x>$$xx[$ju] || $x<$$xx[$jl]) && $ascend)
      0        
386 0         0 { croak "Value out of range for table lookup.\n";}
387 0 0 0     0 if(($x<$$xx[$ju] || $x>$$xx[$jl]) && !$ascend)
      0        
388 0         0 { croak "Value out of range for table lookup.\n";}
389            
390             # step through the table sequentially
391 0 0 0     0 if($ascend && $xx->[$index]<$x){ # ascending
    0 0        
392 0   0     0 while($x>$xx->[$index] and $index<=$ju) { $index++;}}
  0         0  
393             elsif(!$ascend && $xx->[$index]>$x){ # descending
394 0   0     0 while($x<$xx->[$index] and $index<=$ju) { $index++;}}
  0         0  
395 0         0 else{ return -1;} # starting index is too high
396            
397 0         0 return $index-1;
398             }
399              
400              
401             sub interp(\@\@\@){
402             # Interpolates (table lookup) piecewise linearly an
403             # array (third argument). Returns
404             # The table is represented by the first two arguments, i.e. @xx and @yy.
405             # Assumes the @xx values to be monotonically increasing.
406             #
407             # copyright by H.A. Kestler, 1998
408            
409 1     1   6 use vars ('@xx', '@yy', '@x');
  1         2  
  1         320  
410 6     6 0 21 local (*xx, *yy, *x)=@_;
411 6         8 my ($i, $index, @y);
412            
413             # make checks
414 6 50       15 if(@xx != @yy) {croak "Sizes of xx and yy arrays are not equal.\n";}
  0         0  
415            
416 6         15 for($i=0; $i<@x; $i++)
417             {
418 18         41 $index=locate(@xx,$x[$i]);
419 18         86 $y[$i]=($yy[$index+1]-$yy[$index])/($xx[$index+1]-$xx[$index])*
420             ($x[$i]-$xx[$index]) + $yy[$index];
421             }
422 6         23 return @y;
423             }
424              
425              
426             sub roc($$\@){
427             # ROC (receiver operator characteristic) curves with confidence bounds
428             #
429             # Determines the ROC curve and its nonparametric confidence bounds.
430             # The ROC curve shows the relationship of "probability of false
431             # alarm" (x-axis) to "probability of detection" (y-axis) for a
432             # certain test.
433             # Or in medical terms: the "probability of a positive test, given no
434             # disease" to the "probability of a positive test, given disease".
435             # The ROC curve may be used to determine an "optimal" cutoff
436             # point for the test.
437             #
438             # The routine takes three arguments:
439             # (1) type of model: 'decrease' or 'increase', this states the assumption
440             # that a higher ('increase') value of the data tends to be an
441             # indicator of a positive test result or for the model 'decrease'
442             # a lower value.
443             # (2) two-sided confidence interval (usually 0.95 is chosen).
444             # (3) the data stored as a list-of-lists:
445             # each entry in this list consits of an "value / true group" pair,
446             # i.e. value / disease present. Group values are from {0,1}.
447             # 0 stands for disease (or signal) not present (prior knowledge) and
448             # 1 for disease (or signal) present (prior knowledge).
449             # Example: @s=([2, 0], [12.5, 1], [3, 0], [10, 1], [9.5, 0], [9, 1]);
450             # Notice the small overlap of the groups. The
451             # optimal cutoff point to separate the two groups would be between
452             # 9 and 9.5 if the criterion of optimality is to maximize the
453             # probability of detection and simultaneously minimize the
454             # probability of false alarm.
455             #
456             # Returns a list-of-lists with the three curves:
457             # @ROC=([@lower_b], [@roc], [@upper_b]) each of the curves is
458             # again a list-of-lists with each entry consisting of one (x,y) pair.
459             # The routine impelements the method described in:
460             # R.A. Hilgers, Distribution-Free Confidence Bounds for ROC Curves,
461             # Meth Inform Med 1991; 30:96-101
462             #
463             # copyright by H.A. Kestler, 1998
464            
465 1     1 1 2 my $model_type = shift; # assign
466 1         2 my $conf = shift;
467 1     1   6 use vars '@val_grp';
  1         2  
  1         1244  
468 1         4 local (*val_grp)=@_;
469            
470 1         2 my ($cu, $cl, $elem, $n1, $n0, $i, $j);
471 1         2 my @grp1=();my @grp0=();
  1         2  
472 1         3 my (@f_l_1,@f_m_1,@f_h_1,@f_l_0,@f_m_0,@f_h_0,@mat,@xx,@yy,@x,@y,@index);
473 0         0 my (@lower_b ,@roc ,@upper_b, @ROC);
474            
475             # make checks
476 1 50 33     35 if($conf>=1 || $conf<=0){ croak
  0         0  
477             "The nominal 2-sided confidence limit must be a number of [0,1].\n";}
478 1 50 33     8 if($model_type ne 'increase' && $model_type ne 'decrease'){ croak
  0         0  
479             "Wrong model type specified!\n";}
480            
481 1         4 $cu=(sqrt($conf)+1)/2; # calculate the one-sided upper
482 1         3 $cl=1-$cu; # and lower confidence limits
483            
484             # extract values
485 1         5 for($i=0;$i<@val_grp;$i++){
486 14 100       26 if($val_grp[$i][1]==1) { push @grp1, $val_grp[$i][0];}
  7         18  
487 7         18 else { push @grp0, $val_grp[$i][0];}
488             }
489            
490             # compute ranks and values of inverse incomplete beta function
491 1         4 @f_l_1=rank('low' ,@grp1);
492 1         3 @f_m_1=rank('mean',@grp1);
493 1         4 @f_h_1=rank('high',@grp1);
494 1         4 @f_l_0=rank('low' ,@grp0);
495 1         4 @f_m_0=rank('mean',@grp0);
496 1         4 @f_h_0=rank('high',@grp0);
497            
498            
499 1         3 $n1=@grp1; $n0=@grp0; # number of elements in both arrays
  1         10  
500 1         2 for $elem (@f_l_1){ $elem=Xinbta($elem,$n1+1-$elem,$cl);}
  7         16  
501 1         3 for $elem (@f_m_1){ $elem=Xinbta($elem,$n1+1-$elem,0.5);}
  7         14  
502 1         5 for $elem (@f_h_1){ $elem=Xinbta($elem,$n1+1-$elem,$cu);}
  7         99  
503 1         7 for $elem (@f_l_0){ $elem=Xinbta($elem,$n0+1-$elem,$cl);}
  7         17  
504 1         5 for $elem (@f_m_0){ $elem=Xinbta($elem,$n0+1-$elem,0.5);}
  7         22  
505 1         5 for $elem (@f_h_0){ $elem=Xinbta($elem,$n0+1-$elem,$cu);}
  7         22  
506              
507            
508             # merge and sort
509 1         4 @mat=();
510 1         6 for($i=0;$i<$n1;$i++){ push @mat, [($grp1[$i], -1, -1, -1,
  7         47  
511             $f_l_1[$i], $f_m_1[$i], $f_h_1[$i])];}
512 1         5 for($i=0;$i<$n0;$i++){ push @mat, [($grp0[$i],$f_l_0[$i], $f_m_0[$i],
  7         27  
513             $f_h_0[$i], -1, -1, -1)];}
514             # sort numerically according to value in first column
515 1         12 @mat=@mat[sort{$mat[$a][0] <=> $mat[$b][0]} 0..$#mat];
  37         122  
516            
517              
518             # for practical purposes augment @mat and fill missing data (-1)
519             # at the beginning and end of the matrix
520 1         5 unshift @mat,[-1, 0, 0, Xinbta(1,$n0,$cu), 0, 0, Xinbta(1,$n1,$cu)];
521 1         7 push @mat,[-1, Xinbta($n0,1,$cl), 1, 1, Xinbta($n1,1,$cl), 1, 1];
522 1         9 for($i=1;$mat[$i][1]==-1; $i++){
523 3         7 $mat[$i][1]=0; $mat[$i][2]=0; $mat[$i][3]=$mat[0][3];}
  3         4  
  3         11  
524 1         5 for($i=1;$mat[$i][4]==-1; $i++){
525 0         0 $mat[$i][4]=0; $mat[$i][5]=0; $mat[$i][6]=$mat[0][6];}
  0         0  
  0         0  
526 1         7 for($i=$#mat-1;$mat[$i][1]==-1; $i--){
527 0         0 $mat[$i][1]=$mat[$#mat][1]; $mat[$i][2]=1; $mat[$i][3]=1;}
  0         0  
  0         0  
528 1         6 for($i=$#mat-1;$mat[$i][4]==-1; $i--){
529 5         10 $mat[$i][4]=$mat[$#mat][4]; $mat[$i][5]=1; $mat[$i][6]=1;}
  5         7  
  5         15  
530              
531            
532             # replace missing data (-1) with a piecewise linear interpolation
533 1         7 for($j=1;$j<7;$j++) # iterate thru columns
534             {
535 6         31 for($i=1,@xx=(),@yy=(),@x=(),@index=();$i<$#mat;$i++){
536 84 100       196 push @xx, $mat[$i][0] if $mat[$i][$j] !=-1;
537 84 100       306 push @yy, $mat[$i][$j] if $mat[$i][$j] !=-1;
538 84 100       179 push @x, $mat[$i][0] if $mat[$i][$j] ==-1;
539 84 100       256 push @index, $i if $mat[$i][$j] ==-1;
540             }
541 6         19 @y=interp(@xx,@yy,@x);
542 6         19 for($i=0;$i<@index;$i++){ $mat[$index[$i]][$j]=$y[$i];}
  18         66  
543             }
544            
545            
546             # calculate (x,y) pairs of ROC curve and its limit curves
547             # (lower, ROC, upper) according to specified model
548 1         9 for($i=0,@lower_b=(),@roc=(),@upper_b=(),;$i<@mat;$i++){
549 16 50       23 if($model_type eq 'decrease'){
550 16         39 push @lower_b, [($mat[$i][3], $mat[$i][4])];
551 16         36 push @roc, [($mat[$i][2], $mat[$i][5])];
552 16         85 push @upper_b, [($mat[$i][1], $mat[$i][6])];
553             }
554             else{
555 0         0 push @lower_b, [(1-$mat[$i][3], 1-$mat[$i][4])];
556 0         0 push @roc, [(1-$mat[$i][2], 1-$mat[$i][5])];
557 0         0 push @upper_b, [(1-$mat[$i][1], 1-$mat[$i][6])];
558             }
559             }
560            
561 1         13 @ROC=([@lower_b], [@roc], [@upper_b]);
562 1         25 return @ROC;
563             }
564              
565              
566              
567              
568             # Autoload methods go after =cut, and are processed by the autosplit program.
569              
570             1;
571             __END__