File Coverage

blib/lib/Statistics/ROC.pm

Criterion	Covered	Total	%
statement	247	297	83.1
branch	65	98	66.3
condition	21	75	28.0
subroutine	15	16	93.7
pod	7	11	63.6
total	355	497	71.4

line	stmt	bran	cond	sub	pod	time	code
1							#LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
2							#
3							# ROC.pm - A Perl module implementing receiver-operator-characteristic (ROC)
4							# curves with nonparametric confidence bounds
5							#
6							# Copyright (c) 1998 Hans A. Kestler. All rights reserved.
7							# This program is free software; you may redistribute it and/or
8							# modify it under the same terms as Perl itself.
9							#
10							# This code implements a method for constructing nonparametric confidence
11							# for ROC curves described in
12							# R.A. Hilgers, Distribution-Free Confidence Bounds for ROC Curves,
13							# Meth Inform Med 1991; 30:96-101
14							# Additionally some auxilliary functions were ported (and corrected) from
15							# Fortran (Applied Statistics, ACM).
16							#
17							# Written in Perl by Hans A. Kestler.
18							# Bugs, comments, suggestions to:
19							# Hans A. Kestler
20							#
21							#
22							#
23							#LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
24
25							package Statistics::ROC;
26							require 5;
27	1			1		22777	use Carp;
	1					3
	1					89
28	1			1		6	use strict;
	1					1
	1					37
29	1			1		5	use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
	1					7
	1					2177
30
31							require Exporter;
32
33							@ISA = ('Exporter');
34							@EXPORT = qw(
35							roc rank loggamma betain Betain xinbta Xinbta
36							);
37							$VERSION = '0.01';
38
39
40
41
42							# Algorithm 291, Logarithm of the gamma function.
43							# in Collected Algorithms of the ACM, Vol II, 1980
44							# M.C. Pike and I.D. Hill with remark by M.R. Hoare
45							# see also Pike, M.C. and Hill, I.D. (1966). Algorithm 291. Logarithm of the
46							# gamma function. Commun. Ass. Comput. Mach., 9,684.
47
48							sub loggamma($){
49							# This procedure evaluates the natural logarithm of gamma(x) for all
50							# x>0, accurate to 10 decimal places. Stirlings formula is used for the
51							# central polynomial part of the procedure
52
53	145			145	1	168	my $x= shift; # default arg is @_
54	145					157	my ($f, $z);
55
56
57	145	50				263	if($x==0){return 743.746924740801} # this is: loggamma(9.9999999999E-324)
	0					0
58
59	145	100				211	if($x < 7)
60							{
61	80					248	for($z=$x,$f=1;$z<7;$z++)
62							{
63	296					247	$x=$z; $f*=$z;
	296					1375
64							}
65	80					73	$x++;
66	80					123	$f= -log($f); # log returns the natural logarithm
67							}
68	65					84	else{ $f=0;}
69	145					165	$z=1/($x*$x);
70	145					803	return $f+($x-.5)*log($x)-$x+.918938533204673+
71							(((-.000595238095238$z+.000793650793651)$z-
72							.002777777777778)*$z+.083333333333333)/$x;
73							}
74
75
76
77							# Algorithm AS 63 with remark AS R19,
78							# Computes incomplete beta function ratio
79							# K.L. Majumder and G.P. Bhattacharjee (1973). The Incomplete Beta Integral,
80							# Appl. Statist.,22:409:411 and
81							# G.W. Cran, K.J. Martin and G.E. Thomas (1977).Remark AS R19 and
82							# Algorithm AS109, A Remark on Algorithms AS 63: The Incomplete Beta Integral
83							# AS 64: Inverse of the Incomplete Beta Function Ratio,
84							# Appl. Statist., 26:111-114.
85							#
86							# Remarks:
87							# Complete beta function: B(p,q)=gamma(p)*gamma(q)/gamma(p+q)
88							# log(B(p,q))=ln(gamma(p))+ln(gamma(q))-ln(gamma(p+q))
89							#
90							# Incomplete beta function ratio:
91							# I_x(p,q)=1/B(p,q) * \int_0^x t^{p-1}*(1-t)^{q-1} dt
92							#
93							# --> log(B(p,q)) has to be supplied to calculate I_x(p,q)
94							# log denotes the natural logarithm
95							# $beta = log(B(p,q))
96							# $x = x
97							# $p = p
98							# $q = q
99							# The subroutine returns I_x(p,q). If an error occurs a negative value
100							# {-1,-2} is returned.
101
102							sub betain($$$$){
103							# Computes incomplete beta function ratio for arguments
104							# $x between zero and one, $p and $q positive.
105							# Log of complete beta function, $beta, is assumed to be known.
106
107	158			158	1	265	my ($x, $p, $q, $beta) = @_;
108	158					168	my ($xx, $psq, $cx, $pp, $qq, $index, $betain,
109							$ns, $term, $ai, $rx, $temp);
110	158					252	my $ACU=1.0E-14; # accuracy
111
112							# tests for admissibility of arguments
113	158	50	33			628	if($p<=0 \|\| $q<=0){ return -1;}
	0					0
114	158	50	33			631	if($x<0 \|\| $x>1) { return -2;}
	0					0
115
116							# change tail if necessary and determine s
117	158					184	$psq=$p+$q; $cx=1-$x;
	158					154
118	158	100				307	if($p<$psq*$x){ $xx=$cx; $cx=$x; $pp=$q; $qq=$p; $index=1;}
	19					20
	19					19
	19					19
	19					15
	19					21
119	139					164	else{ $xx=$x; $pp=$p; $qq=$q; $index=0;}
	139					197
	139					124
	139					149
120	158					204	$term=1; $ai=1; $betain=1;
	158					172
	158					140
121	158					177	$ns=$qq+$cx*$psq;
122
123							# use Soper's reduction formulae
124	158					164	$rx=$xx/$cx;
125	158		66			153	do{
126	873	50				1518	if($ns>=0){$temp=$qq-$ai; if($ns==0){$rx=$xx;}}
	783	100				754
	783					1858
	0					0
127	90					86	else{ $temp=$psq; $psq++;}
	90					78
128	873					1115	$term = $temp$rx/($pp+$ai);
129	873					948	$betain+=$term;
130	873					961	$temp=abs($term); $ai++; $ns--;}
	873					883
	873					2448
131							until($temp<=$ACU && $temp<=$ACU*$betain);
132
133							# calculate result
134	158					879	$betain = exp($pplog($xx)+($qq-1)*log($cx)-$beta)/$pp;
135	158	100				2376	if($index){ return 1-$betain;}
	19					44
136	139					438	else{ return $betain;}
137							}
138
139							sub Betain($$$){
140							# Computes the incomplete beta function
141							# by calling loggamma() and betain()
142	1			1	1	2	my ($x, $p, $q) = @_;
143
144	1	50				8	if($x==1){return 1;}
	0	50				0
	0					0
145							elsif($x==0){return 0;}
146	1					4	else{ return betain($x, $p, $q,loggamma($p)+loggamma($q)-loggamma($p+$q));}
147							}
148
149							sub max($$){
150							# computes the maximum of two numbers
151	47			47	0	144	my ($a, $b) = @_;
152
153	47	50				281	if($a>$b){ return $a;}
	47					110
154	0					0	else{ return $b;}
155							}
156
157
158							# Algorithm AS 109,
159							# Computes inverse of incomplete beta function ratio
160							# G.W. Cran, K.J. Martin and G.E. Thomas (1977).Remark AS R19 and
161							# Algorithm AS109, A Remark on Algorithms AS 63: The Incomplete Beta Integral
162							# AS 64: Inverse of the Incomplete Beta Function Ratio,
163							# Appl. Statist., 26:111-114.
164							#
165							# Remark AS R83 and the correction in vol40(1) of Appl. Statist.(1991), p.236
166							# have been incorporated in this version.
167							# K.J. Berry, P.W. Mielke, Jr and G.W. Cran (1990) Algorithm AS R83, A Remark
168							# on Algorithm AS 109: Inverse of the Incomplete Beta Function Ratio,
169							# Appl. Statist., 39:309-310.
170							#
171							# Remarks:
172							#
173							# Complete beta function: B(p,q)=gamma(p)*gamma(q)/gamma(p+q)
174							# log(B(p,q))=ln(gamma(p))+ln(gamma(q))-ln(gamma(p+q))
175							#
176							# Incomplete beta function ratio:
177							# alpha = I_x(p,q) = 1/B(p,q) * \int_0^x t^{p-1}*(1-t)^{q-1} dt
178							#
179							# --> log(B(p,q)) has to be supplied to calculate I_x(p,q)
180							# log denotes the natural logarithm
181							# $beta = log(B(p,q))
182							# $alpha= I_x(p,q)
183							# $p = p
184							# $q = q
185							# The subroutine returns x. If an error occurs a negative value {-1,-2,-3}
186							# is returned.
187
188							sub xinbta($$$$){
189							# Computes inverse of incomplete beta function ratio
190							# for given positive values of the arguments $p and $q,
191							# $alpha between zero and one.
192							# Log of complete beta function, $beta is assumed to be known.
193							#
194							# copyright by H.A. Kestler, 1998
195
196	47			47	1	95	my ($p, $q, $beta, $alpha) = @_;
197	47					152	my ($a, $y, $pp, $qq, $index, $r, $h, $t, $w, $xinbta,
198							$yprev, $prev,$s, $sq, $tx, $adj, $g);
199	47					49	my $SAE=-37;
200	47					101	my $FPU=10**$SAE;
201	47					48	my $ACU;
202
203							# test for admissibility of parameters
204	47	50	33			211	if($p<=0 \|\| $q<=0){ return -1;}
	0					0
205	47	50	33			230	if($alpha<0 \|\| $alpha>1) { return -2;}
	0					0
206	47	50	33			180	if($alpha==0 \|\| $alpha==1){ return $alpha;}
	0					0
207
208							# change tail if necessary
209	47	100				82	if($alpha>.5){ $a=1-$alpha; $pp=$q; $qq=$p; $index=1;}
	17					18
	17					18
	17					20
	17					24
210	30					32	else{ $a=$alpha; $pp=$p; $qq=$q; $index=0;}
	30					28
	30					28
	30					48
211
212							# calculate the initial approximation
213	47					202	$r=sqrt(-log($a*$a));
214	47					95	$y=$r-(2.30753+.27061$r)/(1+(.99229+.04481$r)*$r);
215	47	100	100			166	if($pp>1 && $qq > 1)
216							{
217	31					43	$r=($y*$y-3)/6; $s=1/($pp+$pp-1); $t=1/($qq+$qq-1);
	31					37
	31					37
218	31					31	$h=2/($s+$t);
219	31					69	$w=$ysqrt($h+$r)/$h-($t-$s)($r+5/6-2/(3*$h));
220	31					1760	$xinbta=$pp/($pp+$qq*exp($w+$w));
221							}
222							else
223							{
224	16					21	$r=$qq+$qq; $t=1/(9*$qq);
	16					25
225	16					66	$t=$r(1-$t+$ysqrt($t))**3;
226	16	50				35	if($t<=0){
227	0					0	$xinbta=1-exp((log((1-$a)*$qq)+$beta)/$qq);
228							}
229							else{
230	16					27	$t=(4*$pp+$r-2)/$t;
231	16	100				86	if($t<=1){ $xinbta=exp((log($a*$pp)+$beta)/$pp);}
	4					13
232	12					28	else{ $xinbta=1-2/($t+1);}
233							}
234							}
235
236							# solve for $x by a modified newton-raphson method
237							# using subroutine betain()
238	47					61	$r=1-$pp; $t=1-$qq; $yprev=0; $sq=1; $prev=1;
	47					47
	47					53
	47					44
	47					41
239	47	50				88	if($xinbta<.0001){ $xinbta=.0001;}
	0					0
240	47	50				70	if($xinbta>.9999){ $xinbta=.9999;}
	0					0
241	47					262	$ACU=10(max(-5/$pp2-1/$a**.2-13,$SAE));
242	47					58	do{
243	157					426	$y=betain($xinbta,$pp,$qq,$beta);
244	157	50	33			726	if($y==-1 \|\| $y==-2){ return -3;} # betain returns an exception
	0					0
245	157					418	$y=($y-$a)exp($beta+$rlog($xinbta)+$t*log(1-$xinbta));
246	157	50				288	if($y*$y<=0){ $prev=max($sq,$FPU);}
	0					0
247	157					301	$g=1;
248	157		33			162	Label10: do{
249	157					219	do{ $adj=$g$y; $sq=$adj$adj; $g/=3;} while($sq>=$prev);
	157					161
	157					143
	157					349
250	157					643	$tx=$xinbta-$adj;}
251							until($tx>=0 && $tx<=1);
252	157	100	66			605	if($prev<=$ACU \|\| $y*$y<=$ACU){ goto Label12;}
	47					696
253	110	50	33			388	if($tx==0 \|\| $tx==1){ goto Label10;}
	0					0
254	110					114	$xinbta=$tx; $yprev=$y;}
	110					216
255							until($adj==0);
256
257							Label12:
258	47	100				81	if($index){ return 1-$xinbta;}
	17					271
259	30					121	else{ return $xinbta;}
260							}
261
262
263							sub Xinbta($$$){
264							# Computes the inverse of the incomplete beta function
265							# by calling loggamma() and xinbta()
266							#
267							# copyright by H.A. Kestler, 1998
268
269	47			47	1	74	my ($p, $q, $alpha) = @_;
270
271	47	50				110	if($alpha==1){return 1;}
	0	50				0
	0					0
272							elsif($alpha==0){return 0;}
273	47					80	else{ return xinbta($p, $q,loggamma($p)+loggamma($q)-loggamma($p+$q),
274							$alpha);}
275							}
276
277
278
279							sub rank($\@){
280							# Computes the ranks of the values specified as the second
281							# argument (an array). Returns a vector of ranks
282							# corresponding to the input vector.
283							# Different types of ranking are possible ('high', 'low', 'mean'),
284							# and are specified as first argument.
285							# These differ in the way ties of the input vector, i.e. identical
286							# values, are treated:
287							# 'high' --> replace ranks of identical values with their
288							# highest rank
289							# 'low' --> replace ranks of identical values with their
290							# lowest rank
291							# 'mean' --> replace ranks of identical values with the mean
292							# of their rank
293							#
294							# copyright by H.A. Kestler, 1998
295
296	9			9	1	16	my ($type, $r) = @_; # $type: type of ranking 'high', 'low' or 'mean'
297							# $r: reference to array of values to be ranked
298	9					9	my (@s, $s, $i, @e, @rk, $rk_m);
299
300							# calculate initial rank's
301	9					15	@s=sort{$$r[$a]<=>$$r[$b]} 0..$#{$r}; # sort idx num. by values of @r
	120					153
	9					33
302	9					32	for($i=0,@rk=@s;$i<@rk;$i++){ $rk[$s[$i]]=$i+1;} # set rank's
	69					138
303
304							# treat ties
305	9					28	for($i=1,@e=(); $i<@s; $i++){
306	60	100				162	if($$r[$s[$i]]==$$r[$s[$i-1]]){ # test if there are ties
		100
307	21					48	push @e,$i-1;} # save index numbers of tied values (minus 1)
308							elsif(@e){ # ties have occured and are now being treated
309	18	100				34	if($type eq'mean'){ # calculate mean value of tied ranks
310	6					7	$rk_m=0;
311	6					12	for(@e,$e[-1]+1){ $rk_m+=$rk[$s[$_]];} $rk_m/=@e+1;
	13					22
	6					11
312							}
313	18					29	for(@e,$e[-1]+1){
314	39	100				84	if($type eq 'high'){ $rk[$s[$_]]=$rk[$s[$e[-1]+1]];}
	13	100				25
		50
315	13					27	elsif($type eq 'low' ){ $rk[$s[$_]]=$rk[$s[$e[0]]];}
316	13					24	elsif($type eq 'mean'){ $rk[$s[$_]]=$rk_m;}
317	0					0	else{ croak "Wrong type of ranking (high\|low\|mean).\n";}
318							}
319	18					51	@e=(); # reinitialize @e
320							}
321							}
322	9					71	return @rk;
323							}
324
325
326							sub locate(\@$){
327							# Routine to find the index for table lookup which is below
328							# the value to be interpolated.
329							# Given a reference to an array $xx and a value $x a value $j
330							# is returned such that $x is between $xx[$j] and $xx[$j+1].
331							# $xx must be monotonic, either increasing or decreasing.
332							#
333							# This routine is adapted from "Numerical Recipes in C",
334							# second edition, by Press, Teukolsky, Vetterling and Flannery,
335							# Cambridge University Press, 1992.
336							# It uses bisection to find the right place, which has a
337							# comutational complexity of O(log_2(n)).
338							#
339							# copyright by H.A. Kestler, 1998
340
341	18			18	0	24	my ($xx,$x)=@_;
342	18					21	my ($jl,$ju)=(0,$#{$xx}); # initialize lower and upper limits
	18					39
343	18					21	my ($jm,$ascend);
344
345	18					31	$ascend=$$xx[$ju] > $$xx[0];
346
347							# test if $x is inside of the array
348	18	50	33			177	if(($x>$$xx[$ju] \|\| $x<$$xx[$jl]) && $ascend)
			33
349	0					0	{ croak "Value out of range for table lookup (1): $x.\n";}
350	18	50	33			95	if(($x<$$xx[$ju] \|\| $x>$$xx[$jl]) && !$ascend)
			33
351	0					0	{ croak "Value out of range for table lookup (2): $x.\n";}
352
353	18					39	while(($ju-$jl)>1) { # If we are not yet done
354	57					68	$jm=int(($ju+$jl)/2); # compute a midpoint,
355	57	100				104	if($x > $xx->[$jm] == $ascend)
356	30					60	{ $jl=$jm;} # and replace either the lower limit
357							else
358	27					58	{ $ju=$jm;} # or the upper limit, as appropriate.
359							}
360	18					34	return $jl;
361							}
362
363
364							sub linlocate(\@$$){
365							# Routine to find the index for table lookup which is below
366							# the value to be interpolated.
367							# Given a reference to an array $xx and a value $x a value $j
368							# is returned such that $x is between $xx[$j] and $xx[$j+1].
369							# $xx must be monotonic, either increasing or decreasing.
370							#
371							# Starts searching linearly from an initial index value
372							# provided as the third argument.
373							# If no index value can be found a negative value is
374							# returned, i.e. -1.
375							#
376							# copyright by H.A. Kestler, 1998
377
378	0			0	0	0	my ($xx,$x,$index)=@_;
379	0					0	my ($jl,$ju)=(0,$#{$xx}); # initialize lower and upper limits
	0					0
380	0					0	my $ascend;
381
382	0					0	$ascend=$$xx[$ju] > $$xx[0];
383
384							# test if $x is inside of the array
385	0	0	0			0	if(($x>$$xx[$ju] \|\| $x<$$xx[$jl]) && $ascend)
			0
386	0					0	{ croak "Value out of range for table lookup.\n";}
387	0	0	0			0	if(($x<$$xx[$ju] \|\| $x>$$xx[$jl]) && !$ascend)
			0
388	0					0	{ croak "Value out of range for table lookup.\n";}
389
390							# step through the table sequentially
391	0	0	0			0	if($ascend && $xx->[$index]<$x){ # ascending
		0	0
392	0		0			0	while($x>$xx->[$index] and $index<=$ju) { $index++;}}
	0					0
393							elsif(!$ascend && $xx->[$index]>$x){ # descending
394	0		0			0	while($x<$xx->[$index] and $index<=$ju) { $index++;}}
	0					0
395	0					0	else{ return -1;} # starting index is too high
396
397	0					0	return $index-1;
398							}
399
400
401							sub interp(\@\@\@){
402							# Interpolates (table lookup) piecewise linearly an
403							# array (third argument). Returns
404							# The table is represented by the first two arguments, i.e. @xx and @yy.
405							# Assumes the @xx values to be monotonically increasing.
406							#
407							# copyright by H.A. Kestler, 1998
408
409	1			1		6	use vars ('@xx', '@yy', '@x');
	1					2
	1					320
410	6			6	0	21	local (xx, yy, *x)=@_;
411	6					8	my ($i, $index, @y);
412
413							# make checks
414	6	50				15	if(@xx != @yy) {croak "Sizes of xx and yy arrays are not equal.\n";}
	0					0
415
416	6					15	for($i=0; $i<@x; $i++)
417							{
418	18					41	$index=locate(@xx,$x[$i]);
419	18					86	$y[$i]=($yy[$index+1]-$yy[$index])/($xx[$index+1]-$xx[$index])*
420							($x[$i]-$xx[$index]) + $yy[$index];
421							}
422	6					23	return @y;
423							}
424
425
426							sub roc($$\@){
427							# ROC (receiver operator characteristic) curves with confidence bounds
428							#
429							# Determines the ROC curve and its nonparametric confidence bounds.
430							# The ROC curve shows the relationship of "probability of false
431							# alarm" (x-axis) to "probability of detection" (y-axis) for a
432							# certain test.
433							# Or in medical terms: the "probability of a positive test, given no
434							# disease" to the "probability of a positive test, given disease".
435							# The ROC curve may be used to determine an "optimal" cutoff
436							# point for the test.
437							#
438							# The routine takes three arguments:
439							# (1) type of model: 'decrease' or 'increase', this states the assumption
440							# that a higher ('increase') value of the data tends to be an
441							# indicator of a positive test result or for the model 'decrease'
442							# a lower value.
443							# (2) two-sided confidence interval (usually 0.95 is chosen).
444							# (3) the data stored as a list-of-lists:
445							# each entry in this list consits of an "value / true group" pair,
446							# i.e. value / disease present. Group values are from {0,1}.
447							# 0 stands for disease (or signal) not present (prior knowledge) and
448							# 1 for disease (or signal) present (prior knowledge).
449							# Example: @s=([2, 0], [12.5, 1], [3, 0], [10, 1], [9.5, 0], [9, 1]);
450							# Notice the small overlap of the groups. The
451							# optimal cutoff point to separate the two groups would be between
452							# 9 and 9.5 if the criterion of optimality is to maximize the
453							# probability of detection and simultaneously minimize the
454							# probability of false alarm.
455							#
456							# Returns a list-of-lists with the three curves:
457							# @ROC=([@lower_b], [@roc], [@upper_b]) each of the curves is
458							# again a list-of-lists with each entry consisting of one (x,y) pair.
459							# The routine impelements the method described in:
460							# R.A. Hilgers, Distribution-Free Confidence Bounds for ROC Curves,
461							# Meth Inform Med 1991; 30:96-101
462							#
463							# copyright by H.A. Kestler, 1998
464
465	1			1	1	2	my $model_type = shift; # assign
466	1					2	my $conf = shift;
467	1			1		6	use vars '@val_grp';
	1					2
	1					1244
468	1					4	local (*val_grp)=@_;
469
470	1					2	my ($cu, $cl, $elem, $n1, $n0, $i, $j);
471	1					2	my @grp1=();my @grp0=();
	1					2
472	1					3	my (@f_l_1,@f_m_1,@f_h_1,@f_l_0,@f_m_0,@f_h_0,@mat,@xx,@yy,@x,@y,@index);
473	0					0	my (@lower_b ,@roc ,@upper_b, @ROC);
474
475							# make checks
476	1	50	33			35	if($conf>=1 \|\| $conf<=0){ croak
	0					0
477							"The nominal 2-sided confidence limit must be a number of [0,1].\n";}
478	1	50	33			8	if($model_type ne 'increase' && $model_type ne 'decrease'){ croak
	0					0
479							"Wrong model type specified!\n";}
480
481	1					4	$cu=(sqrt($conf)+1)/2; # calculate the one-sided upper
482	1					3	$cl=1-$cu; # and lower confidence limits
483
484							# extract values
485	1					5	for($i=0;$i<@val_grp;$i++){
486	14	100				26	if($val_grp[$i][1]==1) { push @grp1, $val_grp[$i][0];}
	7					18
487	7					18	else { push @grp0, $val_grp[$i][0];}
488							}
489
490							# compute ranks and values of inverse incomplete beta function
491	1					4	@f_l_1=rank('low' ,@grp1);
492	1					3	@f_m_1=rank('mean',@grp1);
493	1					4	@f_h_1=rank('high',@grp1);
494	1					4	@f_l_0=rank('low' ,@grp0);
495	1					4	@f_m_0=rank('mean',@grp0);
496	1					4	@f_h_0=rank('high',@grp0);
497
498
499	1					3	$n1=@grp1; $n0=@grp0; # number of elements in both arrays
	1					10
500	1					2	for $elem (@f_l_1){ $elem=Xinbta($elem,$n1+1-$elem,$cl);}
	7					16
501	1					3	for $elem (@f_m_1){ $elem=Xinbta($elem,$n1+1-$elem,0.5);}
	7					14
502	1					5	for $elem (@f_h_1){ $elem=Xinbta($elem,$n1+1-$elem,$cu);}
	7					99
503	1					7	for $elem (@f_l_0){ $elem=Xinbta($elem,$n0+1-$elem,$cl);}
	7					17
504	1					5	for $elem (@f_m_0){ $elem=Xinbta($elem,$n0+1-$elem,0.5);}
	7					22
505	1					5	for $elem (@f_h_0){ $elem=Xinbta($elem,$n0+1-$elem,$cu);}
	7					22
506
507
508							# merge and sort
509	1					4	@mat=();
510	1					6	for($i=0;$i<$n1;$i++){ push @mat, [($grp1[$i], -1, -1, -1,
	7					47
511							$f_l_1[$i], $f_m_1[$i], $f_h_1[$i])];}
512	1					5	for($i=0;$i<$n0;$i++){ push @mat, [($grp0[$i],$f_l_0[$i], $f_m_0[$i],
	7					27
513							$f_h_0[$i], -1, -1, -1)];}
514							# sort numerically according to value in first column
515	1					12	@mat=@mat[sort{$mat[$a][0] <=> $mat[$b][0]} 0..$#mat];
	37					122
516
517
518							# for practical purposes augment @mat and fill missing data (-1)
519							# at the beginning and end of the matrix
520	1					5	unshift @mat,[-1, 0, 0, Xinbta(1,$n0,$cu), 0, 0, Xinbta(1,$n1,$cu)];
521	1					7	push @mat,[-1, Xinbta($n0,1,$cl), 1, 1, Xinbta($n1,1,$cl), 1, 1];
522	1					9	for($i=1;$mat[$i][1]==-1; $i++){
523	3					7	$mat[$i][1]=0; $mat[$i][2]=0; $mat[$i][3]=$mat[0][3];}
	3					4
	3					11
524	1					5	for($i=1;$mat[$i][4]==-1; $i++){
525	0					0	$mat[$i][4]=0; $mat[$i][5]=0; $mat[$i][6]=$mat[0][6];}
	0					0
	0					0
526	1					7	for($i=$#mat-1;$mat[$i][1]==-1; $i--){
527	0					0	$mat[$i][1]=$mat[$#mat][1]; $mat[$i][2]=1; $mat[$i][3]=1;}
	0					0
	0					0
528	1					6	for($i=$#mat-1;$mat[$i][4]==-1; $i--){
529	5					10	$mat[$i][4]=$mat[$#mat][4]; $mat[$i][5]=1; $mat[$i][6]=1;}
	5					7
	5					15
530
531
532							# replace missing data (-1) with a piecewise linear interpolation
533	1					7	for($j=1;$j<7;$j++) # iterate thru columns
534							{
535	6					31	for($i=1,@xx=(),@yy=(),@x=(),@index=();$i<$#mat;$i++){
536	84	100				196	push @xx, $mat[$i][0] if $mat[$i][$j] !=-1;
537	84	100				306	push @yy, $mat[$i][$j] if $mat[$i][$j] !=-1;
538	84	100				179	push @x, $mat[$i][0] if $mat[$i][$j] ==-1;
539	84	100				256	push @index, $i if $mat[$i][$j] ==-1;
540							}
541	6					19	@y=interp(@xx,@yy,@x);
542	6					19	for($i=0;$i<@index;$i++){ $mat[$index[$i]][$j]=$y[$i];}
	18					66
543							}
544
545
546							# calculate (x,y) pairs of ROC curve and its limit curves
547							# (lower, ROC, upper) according to specified model
548	1					9	for($i=0,@lower_b=(),@roc=(),@upper_b=(),;$i<@mat;$i++){
549	16	50				23	if($model_type eq 'decrease'){
550	16					39	push @lower_b, [($mat[$i][3], $mat[$i][4])];
551	16					36	push @roc, [($mat[$i][2], $mat[$i][5])];
552	16					85	push @upper_b, [($mat[$i][1], $mat[$i][6])];
553							}
554							else{
555	0					0	push @lower_b, [(1-$mat[$i][3], 1-$mat[$i][4])];
556	0					0	push @roc, [(1-$mat[$i][2], 1-$mat[$i][5])];
557	0					0	push @upper_b, [(1-$mat[$i][1], 1-$mat[$i][6])];
558							}
559							}
560
561	1					13	@ROC=([@lower_b], [@roc], [@upper_b]);
562	1					25	return @ROC;
563							}
564
565
566
567
568							# Autoload methods go after =cut, and are processed by the autosplit program.
569
570							1;
571							__END__