File Coverage

Bio/Prospect/LocalClient.pm
Criterion Covered Total %
statement 40 213 18.7
branch 1 82 1.2
condition 0 20 0.0
subroutine 14 27 51.8
pod 4 6 66.6
total 59 348 16.9


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Bio::Prospect::LocalClient -- execute Prospect locally
4             $Id: LocalClient.pm,v 1.32 2003/11/18 19:45:45 rkh Exp $
5              
6             =head1 SYNOPSIS
7              
8             my $in = new Bio::SeqIO( -format=> 'Fasta', '-file' => $ARGV[0] );
9             my $po = new Bio::Prospect::Options( seq=>1, svm=>1, global_local=>1,
10             templates=>['1alu', '1bgc','1eera']);
11             my $pf = new Bio::Prospect::LocalClient( {options=>$po );
12            
13             while ( my $s = $in->next_seq() ) {
14             my @threads = $pf->thread( $s );
15             }
16              
17             =head1 DESCRIPTION
18              
19             B<Bio::Prospect::LocalClient> is runs Prospect locally. It is intended to be
20             used to facilitate high-throughput protein sequence threading and as the
21             server-side component of B<Bio::Prospect::SoapClient>, with which it is API
22             compatible.
23              
24             =head1 ROUTINES & METHODS
25              
26             =cut
27              
28              
29             package Bio::Prospect::LocalClient;
30              
31 1     1   835 use base Bio::Prospect::Client;
  1         1  
  1         753  
32              
33 1     1   6 use warnings;
  1         2  
  1         30  
34 1     1   5 use strict;
  1         2  
  1         35  
35 1     1   5 use File::Temp qw( tempfile tempdir );
  1         1  
  1         66  
36 1     1   5 use Carp qw(cluck);
  1         1  
  1         51  
37 1     1   952 use IO::File;
  1         993  
  1         149  
38 1     1   848 use Bio::Prospect::Exceptions;
  1         5  
  1         33  
39 1     1   986 use Bio::Prospect::utilities;
  1         7  
  1         41  
40 1     1   930 use Bio::Prospect::ThreadSummary;
  1         6  
  1         34  
41 1     1   802 use Bio::Prospect::Init;
  1         4  
  1         25  
42 1     1   6 use Digest::MD5;
  1         2  
  1         38  
43 1     1   5 use vars qw( $VERSION );
  1         1  
  1         2932  
44             $VERSION = sprintf( "%d.%02d", q$Revision: 1.32 $ =~ /(\d+)\.(\d+)/ );
45              
46              
47              
48             #-------------------------------------------------------------------------------
49             # new()
50             #-------------------------------------------------------------------------------
51              
52             =head2 new()
53              
54             Name: new()
55             Purpose: constructor
56             Arguments: hash reference with following key/value pairs
57             options => Bio::Prospect::Options object (required)
58             Returns: Bio::Prospect::LocalClient
59              
60             =cut
61              
62              
63             sub new(;%) {
64 1     1 1 343 my $self = shift->SUPER::new(@_);
65 1         4 $self->_setenv();
66 0         0 $self->_prepare_options();
67 0         0 $self->{'xmlCacheName'} = 'xmlCache'; # name of xml file cache
68 0         0 $self->{'sortCacheName'} = 'sortCache'; # name of sort file cache
69 0         0 return $self;
70             }
71              
72              
73             #-------------------------------------------------------------------------------
74             # thread()
75             #-------------------------------------------------------------------------------
76              
77             =head2 thread()
78              
79             Name: thread()
80             Purpose: return a list of Thread objects
81             Arguments: scalar sequence or Bio::PrimarySeqI-derived object
82             Returns: list of Bio::Prospect::Thread objects
83              
84             =cut
85              
86             sub thread($$) {
87 0     0 1 0 my ($self,$s) = @_;
88              
89 0 0 0     0 if ( not defined $s or (ref $s and not $s->isa('Bio::PrimarySeqI')) ) {
      0        
90 0         0 throw Bio::Prospect::BadUsage(
91             "Bio::Prospect::LocalClient::thread() requires one Bio::PrimarySeqI subclass or " .
92             "scalar sequence argument" );
93             }
94              
95 0 0       0 my $seq = ref $s ? $s->seq() : $s;
96 0         0 my $xfn = $self->_thread_to_file( $seq );
97 0         0 my $pf = new Bio::Prospect::File;
98 0 0       0 $pf->open( "<$xfn" ) || throw Bio::Prospect::RuntimeError("$xfn: $!\n");
99              
100 0         0 $self->{'threads'} = [];
101 0         0 while( my $t = $pf->next_thread() ) {
102 0         0 push @{$self->{'threads'}}, $t;
  0         0  
103             }
104 0         0 return( @{$self->{'threads'}} );
  0         0  
105             }
106              
107              
108             #-------------------------------------------------------------------------------
109             # thread_summary()
110             #-------------------------------------------------------------------------------
111              
112             =head2 thread_summary()
113              
114             Name: thread_summary()
115             Purpose: return a list of ThreadSummary objects
116             Arguments: Bio::Seq object
117             Returns: list of rospect2::ThreadSummary objects
118              
119             =cut
120              
121             sub thread_summary($$) {
122 0     0 1 0 my ($self,$s) = @_;
123 0         0 my @summary;
124              
125 0         0 foreach my $t ( $self->thread($s) ) {
126 0         0 push @summary, new Bio::Prospect::ThreadSummary( $t );
127             }
128 0         0 return( @summary );
129             }
130              
131              
132             #-------------------------------------------------------------------------------
133             # xml()
134             #-------------------------------------------------------------------------------
135              
136             =head2 xml()
137              
138             Name: xml()
139             Purpose: return xml string
140             Arguments: Bio::Seq object
141             Returns: string
142              
143             =cut
144              
145             sub xml($$) {
146 0     0 1 0 my ($self,$s) = @_;
147 0         0 my $xfn = $self->_thread_to_file( $s );
148 0 0       0 my $in = new IO::File "<$xfn" or throw
149             Bio::Prospect::RuntimeError( "can't open $xfn for reading");
150 0         0 my $xml='';
151 0         0 while(<$in>){ $xml .= $_; }
  0         0  
152 0         0 return( $xml );
153             }
154              
155              
156             #-------------------------------------------------------------------------------
157             # DEPRECATED METHODS - will be removed in subsequent releases.
158             #-------------------------------------------------------------------------------
159              
160             sub score_summary($$) {
161 0     0 0 0 cluck("This function is deprecated on Oct-23-2003:\n");
162 0         0 my ($self,$s) = @_;
163 0         0 my $xfn = $self->thread_to_file( $s );
164 0         0 return Bio::Prospect::utilities::score_summary( $xfn );
165             }
166              
167             sub thread_to_file($$) {
168 0     0 0 0 cluck("This function is deprecated on Oct-23-2003:\n");
169 0         0 return _thread_to_file($_[0],$_[1]);
170             }
171              
172              
173             #-------------------------------------------------------------------------------
174             # INTERNAL METHODS: not intended for use outside this module
175             #-------------------------------------------------------------------------------
176              
177             =pod
178              
179             =head1 INTERNAL METHODS & ROUTINES
180              
181             The following functions are documented for developers' benefit. THESE
182             SHOULD NOT BE CALLED OUTSIDE OF THIS MODULE. YOU'VE BEEN WARNED.
183              
184             =cut
185              
186              
187             #-------------------------------------------------------------------------------
188             # _get_svm_scores()
189             #-------------------------------------------------------------------------------
190              
191             =head2 _get_svm_scores()
192              
193             Name: _get_svm_scores()
194             Purpose: return a hash of svm scores from a prospect sort file
195             Arguments: sort filename
196             Returns: hash
197              
198             =cut
199              
200             sub _get_svm_scores($$) {
201 0     0   0 my ($self,$fn) = @_;
202 0         0 my %retval;
203              
204 0   0     0 my $in = new IO::File $fn || throw Bio::Prospect::RuntimeError( "can't open $fn for reading" );
205 0         0 my @fld;
206 0         0 while(<$in>) {
207 0 0       0 next if m/^:Protein/;
208 0         0 @fld = split /\s+/;
209 0         0 $retval{$fld[0]} = $fld[3];
210             }
211 0 0       0 if ( scalar (keys %retval) == 0 ) {
212 0         0 throw Bio::Prospect::RuntimeError
213             ( 'Sort file is empty',
214             "The sort file for this sequence is empty. sortProspect likely failed!",
215             "Execute sortProspect on the command-line and check output messages. sortProspect " .
216             "can fail because of erroneous characters in the output xml file (e.g. null character)."
217             );
218             }
219 0         0 close($in);
220 0         0 return %retval;
221             }
222              
223              
224             sub _thread_to_file($$)
225             {
226 0     0   0 my ($self,$s) = @_;
227 0         0 my $xfn;
228 0 0       0 my $seq = ref $s ? $s->seq() : $s;
229              
230              
231             # check the cache for a cached file cooresponding to this sequence.
232             # if available then return it rather than running prospect
233 0         0 my $cached = $self->_get_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'} );
234 0 0 0     0 if ( defined $cached and -e $cached ) {
235 0 0       0 warn("retrieved cache threading info $cached\n") if $ENV{DEBUG};
236 0         0 return $cached;
237             }
238              
239 0         0 my $ifn = $self->_write_seqfile( $seq );
240 0         0 $xfn = $self->_thread1( $ifn );
241 0         0 unlink( $ifn );
242              
243             # new version of prospect outputs svm score when threading. no
244             # longer need to run sortProspect in this case. for backwards
245             # compatibility, check the xml file for svmScore tag. If
246             # not present, then run sortProspect.
247 0 0       0 if ( $self->_hasSvmScore( $xfn ) ) {
248 0 0       0 print(STDERR "xml file ($xfn) already contains svm scores - skip sortProspect step\n") if $ENV{DEBUG};
249 0         0 $self->_put_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'}, $xfn );
250 0         0 return $xfn;
251             } else {
252 0 0       0 print(STDERR "xml file ($xfn) doesn't contain svm scores - run sortProspect\n") if $ENV{DEBUG};
253             # run sortProspect so as to get svm score
254 0         0 my $sfn = $self->_sort1( $xfn );
255              
256             # insert svm score into the prospect output
257 0         0 my $ffn = $self->_output_svm_score( $xfn, $sfn );
258 0         0 unlink( $xfn );
259 0         0 unlink( $sfn );
260              
261             # cache the prospect output filename
262 0         0 $self->_put_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'}, $ffn );
263 0         0 return $ffn;
264             }
265              
266             =pod
267              
268             =over
269              
270             =item B<::_thread_to_file( Bio::Seq | scalar )>
271              
272             Thread one sequence in the Bio::Seq object or the scalar string. The xml
273             ouptut filename is returned. Threading results are cached by sequence for
274             the lifetime of the LocalClient object. See also B<::thread>.
275              
276             =back
277              
278             =cut
279             }
280              
281              
282              
283             sub _thread1($$)
284             {
285 0     0   0 my ($self,$ifn) = @_;
286 0         0 my $xfn = "$ifn.xml";
287 0         0 my @cl = @{$self->{commandline}};
  0         0  
288 0         0 $cl[1] = sprintf($cl[1],$ifn);
289 0         0 $cl[2] = sprintf($cl[2],$xfn);
290 0 0       0 print(STDERR "about to @cl\n") if $ENV{DEBUG};
291 0 0       0 if ( eval { system("@cl") } ) {
  0         0  
292 0         0 my $s = $?;
293 0 0       0 if ($s & 127) {
294 0         0 $s &= 127;
295 0         0 my $sn = Bio::Prospect::utilities::signame($s);
296 0         0 throw Bio::Prospect::RuntimeError
297             ( 'failed to execute Prospect',
298             "received signal $s ($sn)" );
299             }
300 0         0 $s >>= 8;
301 0         0 throw Bio::Prospect::RuntimeError
302             ( 'failed to execute Prospect',
303             "system(@cl) exited with status $s",
304             'check your prospect installation manually' );
305             }
306 0         0 my $fh = new IO::File;
307 0 0       0 $fh->open("<$xfn")
308             || throw Bio::Prospect::Exception("Prospect failed",
309             "prospect completed but didn't create an output file");
310 0         0 while(<$fh>) { # ugh-prospect sometimes barfs
311 0 0       0 if (m/<scoreInfo>/) { # and completes with status 0
312 0         0 $fh->close(); return $xfn; } # (e.g., large sequences)
  0         0  
313             }
314 0         0 throw Bio::Prospect::Exception("Prospect failed",
315             "prospect completed but the output wasn't valid",
316             "prospect may fail if the sequence is "
317             ."too large or there's not enough memory. Try "
318             ."running the sequence manually.");
319 0         0 return undef;
320             =pod
321              
322             =over
323              
324             =item B<::_thread1( filename )>
325              
326             Threads the fasta-formatted sequence in C<filename> which is passed
327             directly to prospect. The name of a temporary file which contains the raw
328             xml output is returned. This method will work with multiple sequences in
329             C<filename>, but other routines in this module will not understand
330             multi-query xml output reliably. Most callers should use thread()
331             instead.
332              
333             =back
334              
335             =cut
336             }
337              
338              
339             #-------------------------------------------------------------------------------
340             # _hasSvmScore()
341             #-------------------------------------------------------------------------------
342              
343             =head2 _hasSvmScore()
344              
345             Name: _hasSvmScore()
346             Purpose: check whether the prospect xml file already contains a svmScore tag
347             Arguments: prospect xml file
348             Returns: 1 (has svm score) or 0 (no svm score)
349              
350             =cut
351              
352             sub _hasSvmScore {
353 0     0   0 my ($self,$xmlFile) = @_;
354 0 0       0 my $in = new IO::File "$xmlFile" or
355             throw Bio::Prospect::RuntimeError("can't open $xmlFile for reading");
356 0         0 my $retval = 0;
357 0         0 while(<$in>) {
358 0 0       0 if ( m/svmScore/ ) {
359 0         0 $retval = 1;
360 0         0 last;
361             }
362             }
363 0         0 $in->close();
364 0         0 return $retval;
365             }
366              
367              
368             #-------------------------------------------------------------------------------
369             # _output_svm_score()
370             #-------------------------------------------------------------------------------
371              
372             =head2 _output_svm_score()
373              
374             Name: _output_svm_score()
375             Purpose: output the svm score in the propsect output file
376             Arguments: prospect xml file, prospect sort file
377             Returns: prospect xml file with svm score
378              
379             =cut
380              
381             sub _output_svm_score {
382 0     0   0 my ($self,$xmlFile,$sortFile) = @_;
383              
384 0         0 my %svm = $self->_get_svm_scores( $sortFile );
385            
386 0         0 my $outFile = "$xmlFile.svm";
387 0 0       0 my $in = new IO::File "$xmlFile" or throw Bio::Prospect::RuntimeError("can't open $xmlFile for reading");
388 0 0       0 my $out = new IO::File ">$outFile" or throw Bio::Prospect::RuntimeError("can't open $outFile for reading");
389            
390 0         0 local $/ = '</threading>';
391 0         0 while(<$in>) {
392 0 0       0 next if ! m/threading/; # make sure that we have valid prospect thread
393 0         0 m#template="(\w+)"#;
394 0         0 my $t = $1;
395 0 0 0     0 if ( ! defined $svm{$t} or $svm{$t} eq '') {
396 0         0 throw Bio::Prospect::RuntimeError
397             ( 'Unable to retrieve svm sort',
398             "no svm score for template=$t" );
399             }
400 0         0 s#(<rawScore>.*?</rawScore>)#$1\n<svmScore>$svm{$t}</svmScore>#g;
401 0         0 print $out $_;
402             }
403 0         0 close($in);
404 0         0 return( $outFile );
405             }
406              
407              
408             #-------------------------------------------------------------------------------
409             # _sort1()
410             #-------------------------------------------------------------------------------
411              
412             =head2 _sort1()
413              
414             Name: _sort1()
415             Purpose: run sortProspect on threading file
416             Arguments: prospect xml file
417             Returns: filename of sortProspect results
418              
419             =cut
420              
421             sub _sort1($$) {
422 0     0   0 my ($self,$xfn) = @_;
423 0         0 my $sfn = "$xfn.sort";
424 0         0 my $cmd = "sortProspect $xfn 2>/dev/null 1>$sfn";
425 0 0       0 print(STDERR "about to $cmd\n") if $ENV{DEBUG};
426 0 0       0 if ( eval { system("$cmd") } )
  0         0  
427             {
428 0         0 my $s = $?;
429 0 0       0 if ($s & 127)
430             {
431 0         0 $s &= 127;
432 0         0 my $sn = Bio::Prospect::utilities::signame($s);
433 0         0 throw Bio::Prospect::RuntimeError
434             ( 'failed to execute Prospect',
435             "received signal $s ($sn)" );
436             }
437 0         0 $s >>= 8;
438 0         0 throw Bio::Prospect::RuntimeError
439             ( 'failed to execute Prospect',
440             "system($cmd) exited with status $s",
441             'check your prospect installation manually' );
442             }
443             # sanity checks on the sort output??
444 0         0 return $sfn;
445             }
446              
447              
448             sub _setenv {
449 1 50   1   12 if (not -d $Bio::Prospect::Init::PROSPECT_PATH ) {
450 1         20 throw Bio::Prospect::Exception
451             ( "PROSPECT_PATH is not set correctly",
452             "PROSPECT_PATH ($Bio::Prospect::Init::PROSPECT_PATH}) is not a valid directory",
453             "Check your prospect installation and set PROSPECT_PATH in Bio::Prospect::Init or as an environment variable" );
454             } else {
455 0           $ENV{'PROSPECT_PATH'} = $Bio::Prospect::Init::PROSPECT_PATH;
456             }
457 0 0         if (not -d $Bio::Prospect::Init::PDB_PATH) {
458 0           throw Bio::Prospect::Exception
459             ( "PDB_PATH is not set correctly",
460             "PDB_PATH ($Bio::Prospect::Init::PDB_PATH) is not a valid directory",
461             "Check your prospect installation and set PDB_PATH in Bio::Prospect::Init or as an environment variable" );
462             } else {
463 0           $ENV{'PDB_PATH'} = $Bio::Prospect::Init::PDB_PATH;
464             }
465             }
466              
467              
468             sub _prepare_options($$) {
469 0     0     my $self = shift;
470 0           my $opts = $self->{options};
471              
472 0 0         (ref $opts eq 'Bio::Prospect::Options')
473             || throw Bio::Prospect::BadUsage('Bio::Prospect::Options argument is missing');
474              
475 0           my @cl = ( "$Bio::Prospect::Init::PROSPECT_PATH/bin/prospect" );
476              
477 0 0         if (exists $opts->{phd}) {
    0          
    0          
478 0           throw Exception::NotYetSupported
479             ( "phd threading isn't implemented" );
480             } elsif (exists $opts->{ssp}) {
481 0           throw Exception::NotYetSupported
482             ( "ssp threading isn't implemented" );
483             } elsif (exists $opts->{seq}) {
484 0           push( @cl, '-seqfile %s' );
485             } else {
486 0           throw Bio::Prospect::BadUsage("Bio::Prospect::Options doesn't specify input type");
487             }
488              
489 0           push(@cl, '-o %s');
490 0   0       push(@cl, '-ncpus '.($opts->{ncpus}||2) );
491 0 0         push(@cl, '-freqfile',$opts->{freqfile} ) if ( exists $opts->{freqfile} );
492 0 0         push(@cl, '-reliab') if $opts->{zscore};
493 0 0         push(@cl, '-3d') if $opts->{'3d'};
494 0 0         push(@cl, $opts->{global_local} ? '-global_local' : '-global');
495              
496             # template set selection
497             # ONE of -scop, -tfile, -templates (array), or -fssp (default)
498 0 0         if ($opts->{scop}) {
    0          
    0          
499 0           push(@cl, '-scop')
500             } elsif (exists $opts->{tfile}) {
501 0           push(@cl, '-tfile', $opts->{tfile})
502             } elsif (exists $opts->{templates}) {
503 0           my ($fh,$fn) = $self->_tempfile('lst');
504 0           $fh->print(join("\n",@{$opts->{templates}}),"\n");
  0            
505 0           $fh->close();
506 0           push(@cl, '-tfile', $fn);
507             } else {
508 0           push(@cl, '-fssp');
509             }
510              
511 0 0 0       push(@cl, '2> /dev/null' ) unless (defined $ENV{DEBUG} and $ENV{DEBUG}>5);
512 0           push(@cl, '1>&2');
513              
514 0           @{$self->{commandline}} = @cl;
  0            
515 0           return @cl;
516             =pod
517              
518             =over
519              
520             =item B<::_prepare_options()>
521              
522             Prepares temporary files based on options (e.g., writes a temporary
523             `tfile') and generates an array of command line options in
524             @{$self->{commandline}}. Args 1 and 2 are input and output respectively
525             and MUST be sprintf'd before use. See thread_1_file().
526              
527             =back
528              
529             =cut
530             }
531              
532             sub _write_seqfile($$)
533             {
534 0     0     my ($self,$seq) = @_;
535 0 0         throw Exception ('seq undefined') unless defined $seq;
536 0           my ($fh,$fn) = $self->_tempfile('fa');
537 0           $seq =~ s/\s//g;
538 0           my $len = length($seq);
539 0           $seq =~ s/.{60}/$&\n/g; # wrap at 60 cols
540 0           $fh->print( ">LocalClient /len=$len\n$seq\n");
541 0           $fh->close();
542 0           return $fn;
543             }
544              
545              
546              
547              
548             =pod
549              
550             =head1 SEE ALSO
551              
552             B<Bio::Prospect::Options>, B<Bio::Prospect::File>,
553             B<Bio::Prospect::Client>, B<Bio::Prospect::SoapClient>,
554             B<Bio::Prospect::Thread>, B<Bio::Prospect::ThreadSummary>
555              
556             http://www.bioinformaticssolutions.com/
557              
558             =cut
559              
560              
561             1;