File Coverage

blib/lib/Bio/SDRS.pm
Criterion Covered Total %
statement 71 418 16.9
branch 7 138 5.0
condition 2 48 4.1
subroutine 15 34 44.1
pod n/a
total 95 638 14.8


line stmt bran cond sub pod time code
1             package Bio::SDRS;
2              
3 1     1   33524 use 5.008;
  1         3  
  1         150  
4 1     1   6 use strict;
  1         1  
  1         36  
5 1     1   6 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION $AUTOLOAD);
  1         6  
  1         91  
6 1     1   5 use warnings;
  1         1  
  1         36  
7 1     1   5 use Carp;
  1         1  
  1         86  
8 1     1   1159 use POSIX;
  1         10605  
  1         7  
9 1     1   5214 use Math::NumberCruncher;
  1         140472  
  1         90  
10 1     1   4529 use Statistics::Distributions;
  1         13292  
  1         2113  
11              
12             require Exporter;
13              
14             @ISA = qw(Exporter);
15              
16             # Items to export into callers namespace by default. Note: do not export
17             # names by default without a very good reason. Use EXPORT_OK instead.
18             # Do not simply export all your public functions/methods/constants.
19              
20             # This allows declaration
21             # use Bio::SDRS ':all';
22             # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
23             # will save memory.
24             our %EXPORT_TAGS = ( 'all' => [ qw() ] );
25              
26             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
27              
28             our @EXPORT = qw();
29              
30             our $AUTOLOAD;
31              
32             my %component = map { ($_ => 1) } ('MULTIPLE', 'LDOSE', 'HDOSE', 'STEP',
33             'MAXPROC', 'TRIM', 'SIGNIFICANCE',
34             'TMPDIR', 'DEBUG');
35              
36             our $VERSION = '0.08';
37              
38             1;
39            
40             =head1 NAME
41              
42             Bio::SDRS - Perl extension for Sigmoidal Dose Response Search, a tool
43             for characterizing biological responses to compounds.
44              
45             =head1 SYNOPSIS
46              
47             use Bio::SDRS;
48             my $sdrs = new Bio::SDRS;
49              
50             $sdrs->doses(0.423377, 1.270132, 3.810395, 11.431184, 34.293553,
51             102.880658, 308.641975, 925.925926, 2777.777778, 8333.333333,
52             25000);
53             $sdrs->set_assay('C8-BMS-208882',
54             1.885, 1.82, 2.2, 2.205, 2.78,
55             4.965, 9.21, 31.275, 74.445, 99.03,
56             100);
57             $sdrs->calculate;
58             foreach my $assay ($sdrs->assays) {
59             print "$assay\n";
60             foreach my $prop (('MAX', 'MIN', 'LOW', 'HIGH', 'EC50',
61             'PVALUE', 'EC50RANGE', 'PEAK', 'A', 'B',
62             'D', 'FOLD')) {
63             printf " %s = %s\n", $prop, $sdrs->ec50data($assay, $prop);
64             }
65             print "\n";
66             }
67              
68             =head2 Constructor
69              
70             $obj = new Bio::SDRS;
71              
72             # You can also use $obj = Bio::SDRS->new();
73              
74             =head2 Object Methods
75              
76             =head3 Input Methods
77              
78             $sdrs->multiple([$new_multiple]);
79             $sdrs->ldose([$new_ldose]);
80             $sdrs->hdose([$new_hdose]);
81             $sdrs->step([$new_step]);
82             $sdrs->maxproc([$new_maxproc]);
83             $sdrs->trim([$new_trim]);
84             $sdrs->significance([$new_significance]);
85             $sdrs->tmpdir([$new_tmpdir]);
86             $sdrs->debug([$new_debug]);
87             $sdrs->doses(doses...);
88             $sdrs->set_assay(assay, {response}...)
89              
90             =head3 Output Methods
91              
92             $sdrs->assays;
93             $sdrs->scandata;
94             $sdrs->score_doses;
95             $sdrs->sorted_assays_by_dose([$dose]);
96             $sdrs->pvalues_by_dose([$dose])
97             $sdrs->ec50data([$assay[, $property[, $precision]]]);
98              
99             =head3 Other Methods
100              
101             $sdrs->calculate;
102              
103             =head1 DESCRIPTION
104              
105             Bio::SDRS implements the Sigmoidal Dose Response Search of assay responses
106             described in the paper by
107              
108             Rui-Ru Ji, Nathan O. Siemers, Lei Ming, Liang Schweizer, and Robert E
109             Bruccoleri.
110              
111             The module is implemented using a simple object oriented paradigm
112             where the object stores all the information needed for a calculation
113             along with a state variable, C. The state variable has three
114             possible values, C<'setup'>, C<'calculating'> and C<'calculated'>. The
115             value of C<'setup'> indicates that the object is being setup with
116             data, and any results in the object are inconsistent with the data.
117             The value of C<'calculating'> indicates the object's computations are
118             in progress and tells the code not to delete intermediate files. This
119             object runs in parallel, and the object destruction code gets called
120             when each thread exits. Intermediate files must be protected at that
121             time. The value of C<'calculated'> indicates that the object's
122             computational results are consistent with the data, and may be
123             returned to a calling program.
124              
125             The C<'calculate'> method is used to update all the calculated values
126             from the input data. It checks the state variable first, and only does
127             the calculation if the state is C<'setup'>. Once the calculations are
128             complete, then the state variable is set to C<'calculated'>. Thus, the
129             C method can be called whenever a calculated value is
130             needed, and there is no performance penalty.
131              
132             The module initializes the C object with a state of
133             C<'setup'>. Any data input sets the state to C<'setup'>. Any requests
134             for calculated data, calls C<'calculate'>, which updates the state
135             variable so futures requests for calculated data return quickly.
136              
137             B This module uses parallel programming via a fork call to get
138             high performance. I
139             calling the C method, and reopen them afterwards. In
140             addition, you must ensure that any automated DESTROY methods take in
141             account their execution when the child processes terminated.>
142              
143             =head1 METHODS
144              
145             The following methods are provided:
146              
147             =over 4
148              
149             =cut
150            
151              
152             =item C
153              
154             Creates a new Bio::SDRS object.
155              
156             =cut
157              
158             sub new {
159 1     1   39 my $pkg;
160 1         3 my $class = shift;
161 1         2 eval {($pkg) = caller(0);};
  1         9  
162 1 50       5 if ($class ne $pkg) {
163 0         0 unshift @_, $class;
164             }
165 1         1 my $self = {};
166 1         2 bless $self;
167 1         5 $self->_init_data;
168 1         2 $self->{MULTIPLE} = 1.18;
169 1         3 $self->{LDOSE} = 0.17;
170 1         2 $self->{HDOSE} = 30000;
171 1         4 $self->{STEP} = 60;
172 1         3 $self->{MAXPROC} = 2;
173 1         3 $self->{TRIM} = 0.0;
174 1         3 $self->{SIGNIFICANCE} = 0.05;
175 1         3 $self->{DEBUG} = 0;
176 1         4 $self->_init_tmp;
177            
178 1         2 return $self;
179             }
180              
181             sub _init_data {
182 1     1   2 my $self = shift;
183 1         11 $self->{STATE} = "setup";
184 1         3 $self->{DOSES} = [];
185 1         3 $self->{RESPONSES} = {};
186             }
187              
188             sub _init_tmp {
189 1     1   2 my $self = shift;
190 1 50       4 my $tmp = exists($ENV{TMPDIR}) ? $ENV{TMPDIR} : "/tmp";
191 1         2 $tmp .= "/sdrs";
192 1 50       6 if (exists($ENV{USER})) {
193 0         0 $tmp .= "." . $ENV{USER};
194             }
195 1         4 $tmp .= "." . $$;
196 1         2 $self->{TMPDIR} = $tmp;
197 1         3 $self->{TMP_CREATED} = 0;
198             }
199              
200             sub DESTROY {
201            
202             # There's an important multiprocessing issue to keep in mind here.
203             # When this process forks, this method will be invoked for all the subprocesses.
204             # Thus, we need to prevent the deletion of the temporary array until the
205             # calculation is completed. That is why there is a check on the STATE.
206            
207 1     1   164 my $self = shift;
208 1 50 33     141 if (not $self->{DEBUG} and
      33        
209             $self->{TMP_CREATED} and
210             $self->{STATE} eq 'calculated') {
211 0         0 &_system_with_check("rm -rf " . $self->{TMPDIR},
212             $self->{DEBUG});
213             }
214             }
215              
216             # documentation for autoloaded methods goes here.
217              
218             =item C
219              
220             Retrieves the current setting for the C value,
221             and optionally sets it. This value specifies the multiplicity factor
222             for increasing the dose in the search. It must be greater than one.
223              
224             =item C
225              
226             Retrieves the current setting for the C value,
227             and optionally sets it. This value specifies the lowest dose in the search.
228             It must be greater than zero.
229              
230             =item C
231              
232             Retrieves the current setting for the C value,
233             and optionally sets it. This value specifies the highest
234             dose in the search. It must be greater than the ldose value.
235              
236             =item C
237              
238             Retrieves the current setting for the C value, and optionally
239             sets it. This value specifies the maximum change in doses in the
240             search. In the search process, this module starts at the ldose
241             value. It tries multiplying the current dose by the C value,
242             but it will only increase the dose by no more than the C value
243             specified here. It must be positive.
244              
245             =item C
246              
247             Retrieves the current setting for the C value,
248             and optionally sets it. This value specifies the maximum number of processes that
249             can be used for the search. The upper limit is 64 and the lower limit is 1.
250              
251             =item C
252              
253             Retrieves the current setting for the C value, and optionally
254             sets it. This value specifies the trimming factor for the number of
255             assays. If the C is 0, then all assays will be used, and if 1,
256             no assays will be used. It must be between 0 and 1.
257              
258             =item C
259              
260             Retrieves the current setting for the C value, and
261             optionally sets it. This value specifies the minimum permitted
262             significance value for the F score cutoff. It must be between zero and
263             1.
264              
265             =item C
266              
267             Retrieves the current setting for the C value, and optionally
268             sets it. This value specifies the temporary directory where scans of
269             the dose calculation are written. The default is
270             /tmp/sdrs.C.C.
271              
272             =item C
273              
274             Retrieves the current setting for the C variable, and
275             optionally sets it. This value specifies whether debugging information
276             is printed and if the temporary directory (see above) is deleted after
277             execution of this module.
278              
279             =cut
280              
281             sub AUTOLOAD {
282 1     1   737 my $self = shift;
283 1         3 my $type = ref($self);
284 1         2 my $pkg = __PACKAGE__;
285 1         2 my ($pos, $new_val);
286              
287 1 50       3 croak "$self is not an object\n" if not $type;
288 1 50       21 croak "$pkg AUTOLOAD function fails on $type\n"
289             if $type !~ m/^$pkg$/;
290 1         9 my $name = uc($AUTOLOAD);
291 1         15 $name =~ s/^.*:://;
292 1 50       5 if (not exists($component{$name})) {
    0          
    0          
293 1         171 croak "$name is not a valid method for $type.\n";
294             }
295             elsif (not exists($self->{$name})) {
296 0           croak "$name is not a valid method for $type\n";
297             }
298             elsif (defined $_[0]) {
299 0           $self->{$name} = shift;
300 0           $self->{STATE} = "setup";
301             }
302 0 0 0       if ($name eq "MULTIPLE" and
303             $self->{MULTIPLE} <= 1.0) {
304 0           croak "multiple parameter must be greater than 1.\n";
305             }
306 0 0 0       if ($name eq "LDOSE" and
307             $self->{LDOSE} <= 0.0) {
308 0           croak "ldose parameter must be positive.\n";
309             }
310 0 0 0       if ($name eq "HDOSE" and
311             $self->{HDOSE} <= $self->{LDOSE}) {
312 0           croak "hdose parameter must be greater than the ldose parameter.\n";
313             }
314 0 0 0       if ($name eq "STEP" and
315             $self->{STEP} <= 0.0) {
316 0           croak "step parameter must be positive.\n";
317             }
318 0 0 0       if ($name eq "MAXPROC" and
      0        
319             ($self->{MAXPROC} > 64 or $self->{MAXPROC} < 1)) {
320 0           croak "maxproc parameter must be between 1 and 64.\n";
321             }
322 0 0 0       if ($name eq "TRIM" and
      0        
323             ($self->{TRIM} < 0.0 or
324             $self->{TRIM} > 1.0)) {
325 0           croak "trim parameter must be between 0 and 1, inclusive\n";
326             }
327 0 0 0       if ($name eq "SIGNIFICANCE" and
      0        
328             ($self->{SIGNIFICANCE} <= 0.0 or
329             $self->{SIGNIFICANCE} >= 1.0)) {
330 0           croak "trim parameter must be between 0 and 1, exclusive\n";
331             }
332 0           return $self->{$name};
333             }
334              
335             =item C
336              
337             Specify the list of compound doses used in the all the assays in the
338             experiment. The doses must be in numerical order, from smallest to
339             largest.
340              
341             =cut
342              
343             sub doses {
344 0     0     my $self = shift;
345 0 0         if (scalar(@_) == 0) {
346 0           return @{$self->{DOSES}};
  0            
347             }
348             else {
349 0           $self->_init_data;
350 0           my $dose1 = shift;
351 0           push(@{$self->{DOSES}}, $dose1);
  0            
352 0           foreach my $dose (@_) {
353 0 0         if ($dose < $dose1) {
354 0           croak "Doses not in order. Current dose is $dose, and previous does is $dose1\n";
355             }
356 0           push(@{$self->{DOSES}}, $dose);
  0            
357 0           $dose1 = $dose;
358             }
359 0           $self->{STATE} = "setup";
360             }
361             }
362              
363             =item C
364              
365             Adds an assay to the list to be searched over. Each response in the
366             list corresponds to the doses specified in the C method. The
367             number must match, and they must be in numerical order, from smallest
368             to largest.
369              
370             =cut
371              
372             sub set_assay {
373 0     0     my $self = shift;
374 0           my $assay = shift;
375              
376 0           $self->{STATE} = "setup";
377 0 0         if (exists($self->{RESPONSES}->{$assay})) {
378 0           croak "Duplicate assay ($assay) response specified.\n";
379             }
380 0           $self->{RESPONSES}->{$assay} = [ @_ ];
381 0 0         if (scalar(@{$self->{DOSES}}) == 0) {
  0            
382 0           croak "You must call the doses method before calling the set_assay method.\n";
383             }
384 0           my $nresp = scalar(@{$self->{RESPONSES}->{$assay}});
  0            
385 0           my $ndoses = scalar(@{$self->{DOSES}});
  0            
386 0 0         if ($nresp != $ndoses) {
387 0           croak "The number of assay responses ($nresp) does not equal the number of doses ($ndoses)\n";
388             }
389             }
390              
391             =item C
392              
393             Return the list of assay names;
394              
395             =cut
396            
397             sub assays {
398 0     0     my $self = shift;
399 0           return keys %{$self->{RESPONSES}};
  0            
400             }
401              
402             =item C
403              
404             Perform the SDRS calculation.
405              
406             =cut
407              
408             sub calculate {
409 0     0     my $self = shift;
410 0 0         return if $self->{STATE} eq 'calculated';
411 0           $self->_make_tmp;
412 0           $self->{STATE} = "calculating";
413             #
414             # Calculate F Test degrees of freedom.
415             #
416 0           my $ndoses = scalar(@{$self->{DOSES}});
  0            
417 0           my $p = 4; #number of parameters
418 0           $self->{FDISTR_N} = $p - 1;
419 0           $self->{FDISTR_M} = $ndoses - $p;
420 0           $self->{CUTOFF} = Statistics::Distributions::fdistr($self->{FDISTR_N},
421             $self->{FDISTR_M},
422             $self->{SIGNIFICANCE});
423              
424 0           my $count = scalar(keys %{$self->{RESPONSES}});
  0            
425 0           foreach my $assay (keys %{$self->{RESPONSES}}) {
  0            
426 0           my @data = @{$self->{RESPONSES}->{$assay}};
  0            
427 0           my ($max, $min) = Math::NumberCruncher::Range(\@data);
428 0           $self->{MAX}->{$assay} = $max;
429 0           $self->{MIN}->{$assay} = $min;
430 0           my $value = _compute_sst(\@data); #total sum of squares
431 0           $self->_define_ab_boundary($assay, \@data);
432 0           $self->{SST}->{$assay} = $value;
433 0           $self->{VALUES}->{$assay} = \@data;
434             }
435 0           my $assayNum = int($count * (1 - $self->{TRIM}));
436 0           my %expressedAssays;
437 0           $count = 0;
438 0           foreach my $g (sort {$self->{MAX}->{$b} <=> $self->{MAX}->{$a}} (keys %{$self->{MAX}})) {
  0            
  0            
439 0 0         last if ($count >= $assayNum);
440 0           $expressedAssays{$g} = 1;
441 0           $count++;
442             }
443              
444 0           my (%pvalues, %sortedprobes, $pid, @pids, $file, $iproc, $err);
445 0           my $tmpdir = $self->{TMPDIR};
446 0           for ($iproc = 0; $iproc < $self->{MAXPROC}; $iproc++) {
447 0 0         if ($pid = fork) {
    0          
448 0 0         print STDERR "Process $pid forked.\n" if $self->{DEBUG};
449 0           push @pids, $pid;
450             }
451             elsif (defined $pid) {
452 0           sleep $iproc;
453 0           $file = "$tmpdir/sdrs.out.$iproc";
454 0 0         open (ECOUT, ">$file") || die "can not open EC50 output file: $!\n";
455             #
456             # The following statement causes the system to write each output line
457             # to disk immediately, rather than waiting for a buffer to fill.
458             #
459 1     1   1160 select((select(ECOUT), $| = 1)[$[]); # Change buffering to line by line.
  1         460  
  1         3258  
  0            
460 0           my $last_dose = $self->{LDOSE};
461 0           my $icount = 0;
462 0           for (my $dose = $self->{LDOSE};
463             $dose < $self->{HDOSE} * $self->{MULTIPLE};
464             $dose *= $self->{MULTIPLE}) {
465 0 0         if ($dose - $last_dose > $self->{STEP}) {
466 0           $dose = $last_dose + $self->{STEP};
467             }
468 0 0         if (($icount++ % $self->{MAXPROC}) == $iproc) {
469 0           print ECOUT $self->_scan_dose_point($dose);
470             }
471 0           $last_dose = $dose;
472             }
473 0           close ECOUT;
474 0           exit 0;
475             }
476             else {
477 0           die "Can't fork: $!\n";
478             }
479             }
480 0           $err = 0;
481 0           foreach $pid (@pids) {
482 0           waitpid($pid, 0);
483 0 0         if ($? != 0) {
484 0           print STDERR "Process $pid return status was $?\n";
485 0           $err = 1;
486             }
487             }
488            
489             # Now collect all the outputs together.
490 0           %{$self->{GSCORE}} = ();
  0            
491 0           %{$self->{GPARAM}} = ();
  0            
492 0           %{$self->{FSCORES}} = ();
  0            
493 0           my $multiple = $self->{MULTIPLE};
494 0           my $step = $self->{STEP};
495 0           @{$self->{SCANDATA}} = ();
  0            
496            
497 0           for ($iproc = 0; $iproc < $self->{MAXPROC}; $iproc++) {
498 0           my $infile = "$tmpdir/sdrs.out.$iproc";
499 0 0         open (IN, "<$infile") || die "can not open input file $infile: $!\n";
500 0           while () {
501 0           push (@{$self->{SCANDATA}}, $_);
  0            
502 0           chomp;
503 0           my ($assay, $dose, $fscore, $a, $b, $d) = split (/\t/, $_);
504 0           $self->{GSCORE}->{$assay}->{$dose} = $fscore;
505 0           $self->{GPARAM}->{$assay}->{$dose} = "$a:$b:$d";
506 0 0         if (defined $expressedAssays{$assay}) {
507 0           $self->{FSCORES}->{$dose}->{$assay} = $fscore;
508             }
509             }
510 0           close IN;
511             }
512            
513             #sort probesets based on F scores for every selected doses, output P values for FDR calculation
514 0           %{$self->{SORTED_DATA}} = ();
  0            
515 0           %{$self->{PVAL_DATA}} = ();
  0            
516 0           foreach my $dose (keys %{$self->{FSCORES}}) {
  0            
517 0           my %fs = %{$self->{FSCORES}->{$dose}};
  0            
518 0           foreach my $assay (sort {$fs{$b} <=> $fs{$a}} (keys %fs)) {
  0            
519 0           push (@{$self->{SORTED_DATA}->{$dose}}, $assay);
  0            
520 0           my $pvalue = Statistics::Distributions::fprob($self->{FDISTR_N},
521             $self->{FDISTR_M},
522             $fs{$assay});
523 0           push (@{$self->{PVAL_DATA}->{$dose}}, $pvalue);
  0            
524             }
525             }
526            
527             #calculates EC50 for every probeset
528 0           %{$self->{EC50DATA}} = ();
  0            
529 0           foreach my $assay (keys %{$self->{GPARAM}}) {
  0            
530 0           my (%fscore, %param);
531 0           %fscore = %{$self->{GSCORE}->{$assay}};
  0            
532 0           %param = %{$self->{GPARAM}->{$assay}};
  0            
533              
534 0           my @sorted_list = sort {$a<=>$b} (values %fscore);
  0            
535 0           my $max = pop @sorted_list;
536 0           my $min = shift @sorted_list;
537 0           @sorted_list = sort {$fscore{$a}<=>$fscore{$b}} (keys %fscore);
  0            
538 0           my $ec50 = pop @sorted_list; #highest f score
539 0           my ($range, $high, $low, $peak, $ec50range, $fold);
540 0 0         if ($max >= $self->{CUTOFF}) {
541 0           $range = $self->_find_ec50_range(\%fscore, $ec50);
542 0           ($high, $low, $peak) = split (/\-/, $range);
543 0           $ec50range = $high - $low;
544             } else {
545 0           $high = $low = $peak = $ec50range = '';
546             }
547 0           my $pvalue = Statistics::Distributions::fprob($self->{FDISTR_N},
548             $self->{FDISTR_M},
549             $max);
550 0           my ($a, $b, $d) = split (/\:/, $param{$ec50});
551 0 0         if ($d >= 0) {
552 0 0         if ($a != 0) {
553 0           $fold = -($b/$a);
554             } else {
555 0           $fold = -99999;
556             }
557             } else {
558 0 0         if ($a != 0) {
559 0           $fold = $b/$a;
560             } else {
561 0           $fold = 99999;
562             }
563             }
564 0           $self->{EC50DATA}->{$assay} = { MAX => $max,
565             MIN => $min,
566             LOW => $low,
567             HIGH => $high,
568             EC50 => $ec50,
569             PVALUE => $pvalue,
570             EC50RANGE => $ec50range,
571             PEAK => $peak,
572             A => $a,
573             B => $b,
574             D => $d,
575             FOLD => $fold };
576             }
577 0           $self->{STATE} = "calculated";
578             }
579              
580             =item C
581              
582             Return the complete list of scan data used in the SDRS calculation. This is just an array of lines containing the values of the EC50 calculations.
583              
584             =cut
585              
586             sub scandata {
587 0     0     my $self = shift;
588 0           return @{$self->{SCANDATA}};
  0            
589             }
590              
591             =item C
592              
593             Return the list of doses used in the SDRS calculation that can be
594             used as arguments for assays and pvalues.
595              
596             =cut
597              
598             sub score_doses {
599 0     0     my $self = shift;
600 0           return keys %{$self->{FSCORES}};
  0            
601             }
602              
603             =item C
604              
605             Return a list of assays for each dose in $dose sorted by F-score.
606              
607             =cut
608            
609             sub sorted_assays_by_dose {
610 0     0     my $self = shift;
611 0           my $dose = shift;
612 0 0         if ($self->{STATE} ne 'calculated') {
613 0           $self->calculate;
614             }
615              
616 0 0         if (not defined $dose) {
617 0           return %{$self->{SORTED_DATA}};
  0            
618             }
619             else {
620 0 0         if (not exists($self->{SORTED_DATA}->{$dose})) {
621 0           carp "Sorted assays by dose = $dose does not exist.\n";
622 0           return undef;
623             }
624             else {
625 0           return @{$self->{SORTED_DATA}->{$dose}};
  0            
626             }
627             }
628             }
629            
630              
631             =item C
632              
633             Return a list of P values for the assays returned by sorted_assays_by_dose
634             for each dose in $dose sorted by F-score.
635              
636             =cut
637            
638             sub pvalues_by_dose {
639 0     0     my $self = shift;
640 0           my $dose = shift;
641 0 0         if ($self->{STATE} ne 'calculated') {
642 0           $self->calculate;
643             }
644              
645 0 0         if (not defined $dose) {
646 0           return %{$self->{PVAL_DATA}};
  0            
647             }
648             else {
649 0 0         if (not exists($self->{PVAL_DATA}->{$dose})) {
650 0           carp "P values by dose = $dose does not exist.\n";
651 0           return undef;
652             }
653             else {
654 0           return @{$self->{PVAL_DATA}->{$dose}};
  0            
655             }
656             }
657             }
658              
659             =item C
660              
661             Returns EC50 data for the calculation. If no arguments are specified,
662             then the internal hash for the EC50 calculation are returned. If just
663             an C<$assay> is specified, then the internal hash for all the EC50
664             values associated with that C<$assay> is returned. If an C<$assay> and
665             C<$property> are specified, then the property for that assay is
666             returned. If the C<$precision> operand is specified, then it controls
667             how many digits of precision are returned for the value.
668              
669             Here is the list of possible properties.
670              
671             MAX Maximum F score
672             MIN Minimum F score
673             LOW Lower bound of 95% confidence interval for the estimated EC50.
674             HIGH Upper bound of 95% confidence interval for the estimated EC50.
675             EC50 Estimated EC50.
676             PVALUE P value of the best fitting model
677             EC50RANGE range of 95% confidence interval for the estimated EC50.
678             PEAK Number of peaks in the F scores at search doses across experimental dose range.
679             A Estimated value for A in the best model.
680             B Estimated value for B in the best model.
681             D Estimated value for D in the best model.
682             FOLD Ratio of B/A or 99999.0. If A == 0. Positive if D < 0, negative otherwise.
683              
684             =cut
685              
686             sub ec50data {
687 0     0     my $self = shift;
688 0           my $assay = shift;
689 0           my $property = shift;
690 0           my $precision = shift;
691              
692 0 0         $precision = 6 if not defined $precision;
693 0 0         if ($self->{STATE} ne 'calculated') {
694 0           $self->calculate;
695             }
696 0 0         if (not defined $assay) {
697 0           return %{$self->{EC50DATA}};
  0            
698             }
699             else {
700 0 0         if (not exists($self->{EC50DATA}->{$assay})) {
701 0           carp "EC50 data for $assay does not exist.\n";
702 0           return undef;
703             }
704 0           my $assaydata = $self->{EC50DATA}->{$assay};
705 0 0         if (not defined $property) {
706 0           return %{$self->{EC50DATA}->{$assay}};
  0            
707             }
708             else {
709 0 0         if (not exists($assaydata->{$property})) {
710 0           carp "EC50 data for property $property for assay $assay does not exist.\n";
711 0           return undef;
712             }
713             else {
714 0           my $ret = $assaydata->{$property};
715 0 0 0       if ($ret ne "" and $ret != int($ret)) {
716 0           $ret = sprintf("%.${precision}g", $ret) + 0.0;
717             }
718 0           return $ret;
719             }
720             }
721             }
722             }
723              
724             sub _make_tmp {
725 0     0     my $self = shift;
726 0 0 0       if (-d $self->{TMPDIR} and -w $self->{TMPDIR}) {
727 0           return;
728             }
729 0 0         if (-d $self->{TMPDIR}) {
730 0           croak "Unable to write " . $self->{TMPDIR} . "\n";
731             }
732 0 0         if (-w $self->{TMPDIR}) {
733 0 0         unlink($self->{TMPDIR}) ||
734             croak "Unable to delete " . $self->{TMPDIR} . "\n";
735             }
736 0           &_system_with_check("mkdir -p " . $self->{TMPDIR},
737             $self->{DEBUG});
738 0           $self->{TMP_CREATED} = 1;
739             }
740              
741             sub _find_ec50_range {
742 0     0     my $self = shift;
743 0           my ($scores, $ec50) = @_;
744 0           my @range = ();
745 0           my $off_flag = 1;
746 0           my $peak = 0;
747 0           my $current;
748 0           my $seen = 0;
749 0           my $boundary;
750 0           foreach my $dose (sort {$a<=>$b} keys %$scores) {
  0            
751 0           $current = $$scores{$dose};
752 0 0         $seen = 1 if ($dose == $ec50);
753 0 0         if ($current >= $self->{CUTOFF}) {
754 0           push @range, $dose;
755 0 0         $off_flag = 0 if ($off_flag == 1);
756             } else {
757 0 0         if ($seen == 1) {
758 0           $boundary = _find_boundary(\@range);
759 0           $seen = 0;
760             }
761 0           @range = ();
762 0 0         if ($off_flag == 0) {
763 0           $off_flag = 1;
764 0           $peak++;
765             }
766             }
767             }
768 0 0         $boundary = _find_boundary(\@range) if ($seen == 1);
769 0 0         $peak++ if ($current >= $self->{CUTOFF});
770            
771 0           return "$boundary-$peak";
772             }
773            
774             sub _find_boundary {
775 0     0     my $range = shift;
776 0           my ($high, $low);
777              
778 0 0         if (@$range >= 2) {
779 0           $low = shift @$range;
780 0           $high = pop @$range;
781             } else {
782 0           $low = shift @$range;
783 0           $high = $low;
784             }
785 0           return "$high-$low";
786             }
787              
788             sub _scan_dose_point {
789 0     0     my $self = shift;
790 0           my $dose = shift;
791 0           my $ecstring = '';
792 0           my $m = $self->{FDISTR_M};
793 0           my $n = $self->{FDISTR_N};
794 0           foreach my $assay (keys %{$self->{VALUES}}) {
  0            
795 0           my ($al, $ah, $astep) = split (/\:/, $self->{ARANGE}->{$assay});
796 0           my ($bl, $bh, $bstep) = split (/\:/, $self->{BRANGE}->{$assay});
797 0           my ($a, $b, $d, $best_sse) = _compute_best_sse($al, $ah, $astep,
798             $bl, $bh, $bstep,
799             $dose,
800             $self->{VALUES}->{$assay},
801             $self->{DOSES});
802 0           my $f_score = ($self->{SST}->{$assay} - $best_sse) * $m / ($best_sse * $n);
803 0           $f_score = sprintf("%.3f", $f_score);
804 0           $a = sprintf("%.3f", $a);
805 0           $b = sprintf("%.3f", $b);
806 0           $d = sprintf("%.3f", $d);
807 0           $ecstring .= "$assay\t" .
808             sprintf("%.5f", $dose) .
809             "\t$f_score\t$a\t$b\t$d\n";
810             }
811 0           return $ecstring;
812             }
813              
814             sub _define_ab_boundary {
815 0     0     my $self = shift;
816 0           my ($assay, $data) = @_;
817 0           my @sorted = sort {$a<=>$b} @$data;
  0            
818 0           my @sorted_copy = @sorted;
819 0           my @brange = splice (@sorted, $#sorted-5, 6);
820 0           $self->_find_step($assay, 'b', \@brange);
821 0           my @arange = splice (@sorted_copy, 0, 6);
822 0           $self->_find_step($assay, 'a', \@arange);
823             }
824              
825             sub _find_step {
826              
827 0     0     my $self = shift;
828 0           my ($assay, $pam, $data) = @_;
829 0           my $stdev = Math::NumberCruncher::StandardDeviation($data);
830 0           my $mean = Math::NumberCruncher::Mean($data);
831 0           my $cutoff = $mean / 10;
832 0 0 0       $stdev = $cutoff if ($stdev eq 'NaN' || $stdev < $cutoff);
833 0           my ($l, $h, $step);
834 0           $step = $stdev / 2.5;
835 0           $step = sprintf("%.3f", $step);
836 0 0         if ($pam eq 'a') {
    0          
837 0           $h = $mean + 2.3*$stdev;
838 0           $l = $mean - 2*$stdev;
839 0 0         $l = $self->{MIN}->{$assay} if ($l <= 0);
840 0           $h = sprintf("%.3f", $h);
841 0           $l = sprintf("%.3f", $l);
842 0           $self->{ARANGE}->{$assay} = "$l:$h:$step";
843             } elsif ($pam eq 'b') {
844 0           $h = $mean + 2*$stdev;
845 0           $l = $mean - 2.3*$stdev;
846 0 0         $l = $self->{MIN}->{$assay} if ($l <= 0);
847 0           $h = sprintf("%.3f", $h);
848 0           $l = sprintf("%.3f", $l);
849 0           $self->{BRANGE}->{$assay} = "$l:$h:$step";
850             }
851             }
852              
853             sub _compute_sst {
854 0     0     my $data = shift;
855 0           my $sst = 0;
856 0           my $mean = Math::NumberCruncher::Mean($data);
857            
858 0           foreach my $data (@$data) {
859 0           $sst += ($data - $mean)**2;
860             }
861 0           return $sst;
862             }
863              
864             sub _system_with_check {
865              
866 0     0     my $command = shift;
867 0           my $echo = shift;
868              
869 0 0 0       if (defined $echo and $echo) {
870 0           &_dated_mesg($command);
871             }
872 0           my $status = system("$command");
873 0 0         if ($status != 0) {
874 0           croak "Shell command: $command returned $status error code.\n";
875             }
876             }
877              
878             sub _dated_mesg {
879              
880             # Print a dated message to STDERR
881 0     0     my $mesg = shift;
882              
883 0           my $date = &_date;
884 0 0         if (substr($mesg, -1, 1) ne "\n") {
885 0           $mesg .= "\n";
886             }
887 0           print STDERR "At $date: $mesg";
888             }
889              
890             sub _date {
891 0     0     return strftime("%a %b %d %T %Z %Y", localtime);
892             }
893              
894             =back
895              
896             =head1 SEE ALSO
897              
898             sdrs.pl
899              
900             =head1 AUTHORS
901              
902             Ruiru Ji
903             Nathan O. Siemers
904             Lei Ming
905             Liang Schweizer
906             Robert Bruccoleri
907              
908             =cut
909              
910            
911 1     1   1492 use Inline C => <<'END_OF_C_CODE', NAME => 'Bio::SDRS::FastSSE';
  1         25673  
  1         9  
912              
913             void extract_double_array(SV *arrayp,
914             double a[],
915             I32 asize,
916             I32 *limitp);
917              
918             void _compute_best_sse(double al,
919             double ah,
920             double astep,
921             double bl,
922             double bh,
923             double bstep,
924             double dose,
925             SV* valuesp,
926             SV* dosesp)
927             {
928             double a, b, d, diff, sse, tmp, y_hat, d2;
929             double a_best, b_best, d_best, sse_best;
930             SV* param_ret;
931             SV* best_sse_ret;
932             I32 count = 0;
933             I32 i;
934             I32 vlimit, dlimit;
935             #define DOSE_SIZE 100
936             double doses[DOSE_SIZE];
937             double values[DOSE_SIZE];
938             Inline_Stack_Vars;
939              
940             extract_double_array(valuesp, values, DOSE_SIZE, &vlimit);
941             extract_double_array(dosesp, doses, DOSE_SIZE, &dlimit);
942             if (vlimit != dlimit) {
943             fprintf(stderr, "Error in compute_best_sse: limit mismatch. vlimit = %d dlimit = %s\n",
944             vlimit, dlimit);
945             exit(1);
946             }
947            
948             for (a = al; a < ah; a += astep) {
949             for (b = bh; b > bl && a <= b; b -= bstep) {
950             diff = b - a;
951             for (d = -6; d < 6.3; d += 0.3) {
952             sse = 0;
953             for (i = 0; i <= dlimit; i++) {
954             tmp = doses[i] / dose;
955             tmp = pow(tmp, d);
956             y_hat = a + diff / (1 + tmp);
957             d2 = (y_hat - values[i]);
958             sse += d2 * d2;
959             }
960             if (++count == 1 || sse < sse_best) {
961             a_best = a;
962             b_best = b;
963             d_best = d;
964             sse_best = sse;
965             }
966             }
967             }
968             }
969             Inline_Stack_Reset;
970             Inline_Stack_Push(sv_2mortal(newSVnv(a_best)));
971             Inline_Stack_Push(sv_2mortal(newSVnv(b_best)));
972             Inline_Stack_Push(sv_2mortal(newSVnv(d_best)));
973             Inline_Stack_Push(sv_2mortal(newSVnv(sse_best)));
974             Inline_Stack_Done;
975             }
976              
977             void extract_double_array(SV *arrayp,
978             double a[],
979             I32 asize,
980             I32 *limitp)
981             /*
982             * Extract the elements of the Perl array, pointed to by arrayp,
983             * into the C array a. The size of a is asize, and the code will check
984             * for overflow. The limiting subscript for arrayp will be returned in
985             * *limitp. */
986              
987             {
988             AV* array;
989             I32 limit, i;
990              
991             array = SvRV(arrayp);
992             limit = av_len(array);
993             if (limit+1 > asize) {
994             fprintf(stderr,
995             "extract_double_array: Perl array is too big. limit = %d asize = %d\n",
996             limit, asize);
997             exit(1);
998             }
999             for (i = 0; i <= limit; i++) {
1000             SV *element, **elementp;
1001             elementp = av_fetch(array, i, 0);
1002             element = *elementp;
1003             a[i] = SvNV(element);
1004             }
1005             *limitp = limit;
1006             }
1007              
1008             END_OF_C_CODE