File Coverage

blib/lib/Bio/Tools/Run/Hmmer.pm
Criterion Covered Total %
statement 30 164 18.2
branch 5 86 5.8
condition 1 20 5.0
subroutine 8 20 40.0
pod 10 10 100.0
total 54 300 18.0


line stmt bran cond sub pod time code
1             # You may distribute this module under the same terms as perl itself
2             # POD documentation - main docs before the code
3              
4             =head1 NAME
5              
6             Bio::Tools::Run::Hmmer - Wrapper for local execution of hmmalign, hmmbuild,
7             hmmcalibrate, hmmemit, hmmpfam, hmmsearch
8              
9             =head1 SYNOPSIS
10              
11             # run hmmsearch (similar for hmmpfam)
12             my $factory = Bio::Tools::Run::Hmmer->new(-hmm => 'model.hmm');
13              
14             # Pass the factory a Bio::Seq object or a file name, returns a Bio::SearchIO
15             my $searchio = $factory->hmmsearch($seq);
16              
17             while (my $result = $searchio->next_result){
18             while(my $hit = $result->next_hit){
19             while (my $hsp = $hit->next_hsp){
20             print join("\t", ( $result->query_name,
21             $hsp->query->start,
22             $hsp->query->end,
23             $hit->name,
24             $hsp->hit->start,
25             $hsp->hit->end,
26             $hsp->score,
27             $hsp->evalue,
28             $hsp->seq_str,
29             )), "\n";
30             }
31             }
32             }
33              
34             # build a hmm using hmmbuild
35             my $aio = Bio::AlignIO->new(-file => "protein.msf", -format => 'msf');
36             my $aln = $aio->next_aln;
37             my $factory = Bio::Tools::Run::Hmmer->new(-hmm => 'model.hmm');
38             $factory->hmmbuild($aln);
39              
40             # calibrate the hmm
41             $factory->calibrate();
42              
43             # emit a sequence stream from the hmm
44             my $seqio = $factory->hmmemit();
45              
46             # align sequences to the hmm
47             my $alnio = $factory->hmmalign(@seqs);
48              
49             =head1 DESCRIPTION
50              
51             Wrapper module for Sean Eddy's HMMER suite of program to allow running of
52             hmmalign, hmmbuild, hmmcalibrate, hmmemit, hmmpfam and hmmsearch. Binaries are
53             available at http://hmmer.janelia.org/
54              
55             You can pass most options understood by the command-line programs to new(), or
56             set the options by calling methods with the same name as the argument. In both
57             instances, case sensitivity matters.
58              
59             Additional methods are hmm() to specifiy the hmm file (needed for all HMMER
60             programs) which you would normally set in the call to new().
61              
62             The HMMER programs must either be in your path, or you must set the environment
63             variable HMMERDIR to point to their location.
64              
65             =head1 FEEDBACK
66              
67             =head2 Mailing Lists
68              
69             User feedback is an integral part of the evolution of this and other
70             Bioperl modules. Send your comments and suggestions preferably to one
71             of the Bioperl mailing lists. Your participation is much appreciated.
72              
73             bioperl-l@bioperl.org - General discussion
74             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
75              
76             =head2 Support
77              
78             Please direct usage questions or support issues to the mailing list:
79              
80             I
81              
82             rather than to the module maintainer directly. Many experienced and
83             reponsive experts will be able look at the problem and quickly
84             address it. Please include a thorough description of the problem
85             with code and data examples if at all possible.
86              
87             =head2 Reporting Bugs
88              
89             Report bugs to the Bioperl bug tracking system to help us keep track
90             the bugs and their resolution. Bug reports can be submitted via the
91             web:
92              
93             http://redmine.open-bio.org/projects/bioperl/
94              
95             =head1 AUTHOR - Shawn Hoon
96              
97             Email: shawnh-at-gmx.net
98              
99             =head1 CONTRIBUTORS
100              
101             Shawn Hoon shawnh-at-gmx.net
102             Jason Stajich jason -at- bioperl -dot- org
103             Scott Markel scott -at- scitegic -dot com
104             Sendu Bala bix@sendu.me.uk
105              
106             =head1 APPENDIX
107              
108             The rest of the documentation details each of the object
109             methods. Internal methods are usually preceded with a _
110              
111             =cut
112              
113             package Bio::Tools::Run::Hmmer;
114              
115 1     1   102170 use strict;
  1         4  
  1         30  
116              
117 1     1   412 use Bio::SeqIO;
  1         39918  
  1         30  
118 1     1   289 use Bio::SearchIO;
  1         7240  
  1         27  
119 1     1   293 use Bio::AlignIO;
  1         51332  
  1         32  
120              
121 1     1   8 use base qw(Bio::Tools::Run::WrapperBase);
  1         2  
  1         392  
122              
123             our $DefaultFormat = 'msf';
124             our $DefaultReadMethod = 'hmmer';
125             our %ALL = (quiet => 'q', o => 'outfile');
126             our @ALIGN_PARAMS = qw(mapali outformat withali o);
127             our @ALIGN_SWITCHES = qw(m oneline q);
128             our @BUILD_PARAMS = qw(n archpri cfile gapmax idlevel null pam pamwgt
129             pbswitch prior swentry swexit o);
130             our @BUILD_SWITCHES = qw(f g s A F amino binary fast hand noeff nucleic
131             wblosum wgsc wme wnone wpb wvoronoi);
132             our @CALIBRATE_PARAMS = qw(fixed histfile mean num sd seed cpu);
133             our @CALIBRATE_SWITCHES = qw();
134             our @EMIT_PARAMS = qw(n seed o);
135             our @EMIT_SWITCHES = qw(c q);
136             our @PFAM_PARAMS = qw(A E T Z domE domT informat cpu);
137             our @PFAM_SWITCHES = qw(n acc cut_ga cut_gc cut_nc forward null2 xnu);
138             our @SEARCH_PARAMS = @PFAM_PARAMS;
139             our @SEARCH_SWITCHES = @PFAM_SWITCHES;
140             our %OTHER = (_READMETHOD => '_readmethod',
141             program_name => [qw(PROGRAM program)],
142             hmm => [qw(HMM db DB)]);
143              
144             # just to be explicit
145             our @UNSUPPORTED = qw(h verbose a compat pvm);
146              
147              
148             =head2 new
149              
150             Title : new
151             Usage : $HMMER->new(@params)
152             Function: Creates a new HMMER factory
153             Returns : Bio::Tools::Run::HMMER
154             Args : -hmm => filename # the hmm, used by all program types; if not set
155             # here, must be set with hmm() method prior to
156             # running anything
157             -_READMETHOD => 'hmmer' (default) || 'hmmer_pull' # the parsing
158             # module to use for
159             # hmmpfam/hmmsearch
160              
161             Any option supported by a Hmmer program, where switches are given
162             a true value, eg. -q => 1, EXCEPT for the following which are handled
163             internally/ incompatible: h verbose a compat pvm
164              
165             WARNING: the default sequence format passed to hmmpfam is msf. If
166             you are using a different format, you need to pass it with informat.
167             e.g.
168             my $factory = Bio::Tools::Run::Hmmer->new(-hmm => 'model.hmm',
169             -informat => 'fasta');
170              
171             -q is synonymous with -quiet
172             -o is synonymous with -outfile
173              
174             # may be specified here, allowing run() to be used, or
175             # it can be omitted and the corresponding method (eg.
176             # hmmalign()) used later.
177             -program => hmmalign|hmmbuild|hmmcalibrate|hmmemit|hmmpfam|hmmsearch
178              
179             =cut
180              
181             sub new {
182 1     1 1 113 my($class, @args) = @_;
183 1         11 my $self = $class->SUPER::new(@args);
184            
185 2         7 $self->_set_from_args(\@args, -methods => {(map { $_ => $ALL{$_} } keys %ALL),
186 3         10 (map { $_ => $OTHER{$_} } keys %OTHER),
187 1         45 (map { $_ => $_ }
  81         137  
188             (@ALIGN_PARAMS,
189             @ALIGN_SWITCHES,
190             @BUILD_PARAMS,
191             @BUILD_SWITCHES,
192             @CALIBRATE_PARAMS,
193             @CALIBRATE_SWITCHES,
194             @EMIT_PARAMS,
195             @EMIT_SWITCHES,
196             @PFAM_PARAMS,
197             @PFAM_SWITCHES,
198             @SEARCH_PARAMS,
199             @SEARCH_SWITCHES))},
200             -create => 1,
201             -case_sensitive => 1);
202            
203 1 50       59 $self->informat || $self->informat($DefaultFormat);
204 1 50       49 $self->_READMETHOD || $self->_READMETHOD($DefaultReadMethod);
205            
206 1         31 return $self;
207             }
208              
209             =head2 run
210              
211             Title : run
212             Usage : $obj->run($seqFile)
213             Function: Runs one of the Hmmer programs, according to the current setting of
214             program() (as typically set during new(-program => 'name')).
215             Returns : A Bio::SearchIO, Bio::AlignIO, Bio::SeqIO or boolean depending on
216             the program being run (see method corresponding to program name for
217             details).
218             Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
219              
220             =cut
221              
222             sub run {
223 0     0 1 0 my $self = shift;
224 0   0     0 my $program = lc($self->program_name || $self->throw("The program must already be specified"));
225 0 0       0 $self->can($program) || $self->throw("'$program' wasn't a valid program");
226 0         0 return $self->$program(@_);
227             }
228              
229             =head2 hmmalign
230              
231             Title : hmmalign
232             Usage : $obj->hmmalign()
233             Function: Runs hmmalign
234             Returns : A Bio::AlignIO
235             Args : list of Bio::SeqI OR Bio::Align::AlignI OR filename of file with
236             sequences or an alignment
237              
238             =cut
239              
240             sub hmmalign {
241 0     0 1 0 my $self = shift;
242 0         0 $self->program_name('hmmalign');
243 0         0 my $input = $self->_setinput(@_);
244            
245 0 0       0 unless (defined $self->o()) {
246 0         0 $self->q(1);
247             }
248 0 0       0 if (! $self->outformat) {
249 0         0 $self->outformat($DefaultFormat);
250             }
251            
252 0         0 return $self->_run($input);
253             }
254              
255             =head2 hmmbuild
256              
257             Title : hmmbuild
258             Usage : $obj->hmmbuild()
259             Function: Runs hmmbuild, outputting an hmm to the file currently set by method
260             hmm() or db(), or failing that, o() or outfile(), or failing that, to
261             a temp location.
262             Returns : true on success
263             Args : Bio::Align::AlignI OR filename of file with an alignment
264              
265             =cut
266              
267             sub hmmbuild {
268 0     0 1 0 my $self = shift;
269 0         0 $self->program_name('hmmbuild');
270 0         0 my $input = $self->_setinput(@_);
271            
272 0 0       0 unless (defined $self->hmm()) {
273 0   0     0 $self->hmm($self->o() || $self->io->tempfile(-dir => $self->tempdir));
274             }
275            
276 0         0 return $self->_run($input);
277             }
278              
279             =head2 hmmcalibrate
280              
281             Title : hmmcalibrate
282             Usage : $obj->hmmcalibrate()
283             Function: Runs hmmcalibrate
284             Returns : true on success
285             Args : none (hmm() must be set, most likely by the -hmm option of new()), OR
286             optionally supply an hmm filename to set hmm() and run
287              
288             =cut
289              
290             sub hmmcalibrate {
291 0     0 1 0 my ($self, $hmm) = @_;
292 0         0 $self->program_name('hmmcalibrate');
293 0 0       0 $self->hmm($hmm) if $hmm;
294 0 0       0 $self->hmm || $self->throw("hmm() must be set first");
295 0         0 return $self->_run();
296             }
297              
298             =head2 hmmemit
299              
300             Title : hmmemit
301             Usage : $obj->hmmemit()
302             Function: Runs hmmemit
303             Returns : A Bio::SeqIO
304             Args : none (hmm() must be set, most likely by the -hmm option of new()), OR
305             optionally supply an hmm filename to set hmm() and run
306              
307             =cut
308              
309             sub hmmemit {
310 0     0 1 0 my ($self, $hmm) = @_;
311 0         0 $self->program_name('hmmemit');
312 0 0       0 $self->hmm($hmm) if $hmm;
313 0 0       0 $self->hmm || $self->throw("hmm() must be set first");
314            
315 0 0       0 unless (defined $self->o()) {
316 0         0 $self->q(1);
317             }
318            
319 0         0 return $self->_run();
320             }
321              
322             =head2 hmmpfam
323              
324             Title : hmmpfam
325             Usage : $obj->hmmpfam()
326             Function: Runs hmmpfam
327             Returns : A Bio::SearchIO
328             Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
329              
330             =cut
331              
332             sub hmmpfam {
333 0     0 1 0 my $self = shift;
334 0         0 $self->program_name('hmmpfam');
335 0         0 my $input = $self->_setinput(@_);
336 0         0 return $self->_run($input);
337             }
338              
339             =head2 hmmsearch
340              
341             Title : hmmsearch
342             Usage : $obj->hmmsearch()
343             Function: Runs hmmsearch
344             Returns : A Bio::SearchIO
345             Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
346              
347             =cut
348              
349             sub hmmsearch {
350 0     0 1 0 my $self = shift;
351 0         0 $self->program_name('hmmsearch');
352 0         0 my $input = $self->_setinput(@_);
353 0         0 return $self->_run($input);
354             }
355              
356             =head2 _setinput
357              
358             Title : _setinput
359             Usage : $obj->_setinput()
360             Function: Internal(not to be used directly)
361             Returns : filename
362             Args : A Bio::PrimarySeqI, Bio::Align::AlignI or filename
363              
364             =cut
365              
366             sub _setinput {
367 0     0   0 my ($self, @things) = @_;
368 0 0       0 @things || $self->throw("At least one input is required");
369            
370 0         0 my $infile;
371 0 0 0     0 if (ref $things[0] && $things[0]->isa("Bio::PrimarySeqI") ){# it is an object
    0 0        
    0          
372 0         0 $infile = $self->_writeSeqFile(@things);
373             }
374             elsif(ref $things[0] && $things[0]->isa("Bio::Align::AlignI")){
375 0         0 $infile = $self->_writeAlignFile(@things);
376             }
377             elsif (-e $things[0]) {
378 0         0 $infile = $things[0];
379             }
380             else {
381 0         0 $self->throw("Unknown kind of input '@things'");
382             }
383            
384 0         0 return $infile;
385             }
386              
387             =head2 _run
388              
389             Title : _run
390             Usage : $obj->_run()
391             Function: Internal(not to be used directly)
392             Returns : Bio::SearchIO
393             Args : file name
394              
395             =cut
396              
397             sub _run {
398 0     0   0 my ($self, $file) = @_;
399              
400             # Use double quotes if file path have empty spaces
401 0 0       0 if ($file =~ m/ /) {
402 0         0 $file = "\"$file\"";
403             }
404            
405 0         0 my $str = $self->executable;
406             # Use double quotes if executable path have empty spaces
407 0 0       0 if ($str =~ m/ /) {
408 0         0 $str = "\"$str\"";
409             }
410              
411 0         0 $str .= $self->_setparams;
412 0 0       0 $str .= ' '.$file if $file;
413 0         0 $self->debug("HMMER command = $str");
414            
415 0         0 my $progname = $self->program_name;
416            
417 0         0 my @in;
418 0         0 my @verbose = (-verbose => $self->verbose);
419 0 0       0 if ($progname =~ /align|build|emit/) {
    0          
420 0         0 my $outfile = $self->o;
421 0 0 0     0 if ($outfile || $progname eq 'hmmbuild') {
422 0 0       0 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
423 0 0       0 $str .= " > $null" if $self->quiet;
424            
425 0 0       0 if ($progname eq 'hmmbuild') {
426 0         0 my $status = system($str);
427 0 0       0 return $status ? 0 : 1;
428             }
429             else {
430 0 0       0 system($str) && $self->throw("HMMER call ($str) crashed: $?\n");
431 0         0 @in = (-file => $outfile);
432             }
433             }
434             else {
435 0 0       0 open(my $fh, "$str |") || $self->throw("HMMER call ($str) crashed: $?\n");
436 0         0 @in = (-fh => $fh);
437             }
438             }
439             elsif ($progname =~ /pfam|search/i) {
440 0 0       0 open(my $fh, "$str |") || $self->throw("HMMER call ($str) crashed: $?\n");
441            
442 0         0 return Bio::SearchIO->new(-fh => $fh,
443             @verbose,
444             -format => $self->_READMETHOD);
445             }
446            
447 0 0       0 if ($progname eq 'hmmalign') {
    0          
    0          
448 0         0 return Bio::AlignIO->new(@in,
449             @verbose,
450             -format => $self->outformat);
451             }
452             elsif ($progname eq 'hmmemit') {
453 0         0 return Bio::SeqIO->new(@in,
454             @verbose,
455             -format => 'fasta');
456             }
457             elsif ($progname =~ /calibrate/) {
458 0 0       0 my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
459 0 0       0 $str .= " > $null 2> $null" if $self->quiet;
460 0         0 my $status = system($str);
461 0 0       0 return $status ? 0 : 1;
462             }
463             }
464              
465             =head2 _setparams
466              
467             Title : _setparams
468             Usage : Internal function, not to be called directly
469             Function: creates a string of params to be used in the command string
470             Returns : string of params
471             Args : none
472              
473             =cut
474              
475             sub _setparams {
476 0     0   0 my $self = shift;
477            
478 0         0 my @execparams;
479             my @execswitches;
480 0         0 SWITCH: for ($self->program_name) {
481 0 0       0 /align/ && do { @execparams = @ALIGN_PARAMS;
  0         0  
482 0         0 @execswitches = @ALIGN_SWITCHES;
483 0         0 last SWITCH; };
484 0 0       0 /build/ && do { @execparams = @BUILD_PARAMS;
  0         0  
485 0         0 @execswitches = @BUILD_SWITCHES;
486 0         0 last SWITCH; };
487 0 0       0 /calibrate/ && do { @execparams = @CALIBRATE_PARAMS;
  0         0  
488 0         0 @execswitches = @CALIBRATE_SWITCHES;
489 0         0 last SWITCH; };
490 0 0       0 /emit/ && do { @execparams = @EMIT_PARAMS;
  0         0  
491 0         0 @execswitches = @EMIT_SWITCHES;
492 0         0 last SWITCH; };
493 0 0       0 /pfam/ && do { @execparams = @PFAM_PARAMS;
  0         0  
494 0         0 @execswitches = @PFAM_SWITCHES;
495 0         0 last SWITCH; };
496 0 0       0 /search/ && do { @execparams = @SEARCH_PARAMS;
  0         0  
497 0         0 @execswitches = @SEARCH_SWITCHES;
498 0         0 last SWITCH; };
499             }
500            
501 0         0 my $param_string = $self->SUPER::_setparams(-params => \@execparams,
502             -switches => \@execswitches,
503             -mixed_dash => 1);
504            
505 0   0     0 my $hmm = $self->hmm || $self->throw("Need to specify either HMM file or Database");
506             # Use double quotes if hmm path have empty spaces
507 0 0       0 if ($hmm =~ m/ /) {
508 0         0 $hmm = "\"$hmm\"";
509             }
510 0         0 $param_string .= ' '.$hmm;
511            
512 0         0 return $param_string;
513             }
514              
515             =head2 program_name
516              
517             Title : program_name
518             Usage : $factory>program_name()
519             Function: holds the program name
520             Returns : string
521             Args : none
522              
523             =cut
524              
525             sub program_name {
526 7     7 1 5232 my $self = shift;
527 7 100       14 if (@_) {
528 1         7 $self->{program_name} = shift;
529            
530             # hack so that when program_name changes, so does executable()
531 1         2 delete $self->{'_pathtoexe'};
532             }
533 7   50     37 return $self->{program_name} || '';
534             }
535              
536             =head2 program_dir
537              
538             Title : program_dir
539             Usage : $factory->program_dir(@params)
540             Function: returns the program directory, obtained from ENV variable.
541             Returns : string
542             Args : none
543              
544             =cut
545              
546             sub program_dir {
547 3 50   3 1 11 return $ENV{HMMERDIR} if $ENV{HMMERDIR};
548             }
549              
550             =head2 _writeSeqFile
551              
552             Title : _writeSeqFile
553             Usage : obj->_writeSeqFile($seq)
554             Function: Internal(not to be used directly)
555             Returns : filename
556             Args : list of Bio::SeqI
557              
558             =cut
559              
560             sub _writeSeqFile {
561 0     0     my ($self, @seq) = @_;
562 0           my ($tfh, $inputfile) = $self->io->tempfile(-dir=>$self->tempdir);
563 0           $self->informat('fasta');
564 0           my $out = Bio::SeqIO->new(-fh => $tfh , '-format' => 'fasta');
565 0           foreach my $s (@seq) {
566 0           $out->write_seq($s);
567             }
568 0           $out->close();
569 0           $out = undef;
570 0           close($tfh);
571 0           undef $tfh;
572 0           return $inputfile;
573             }
574              
575             =head2 _writeAlignFile
576              
577             Title : _writeAlignFile
578             Usage : obj->_writeAlignFile($seq)
579             Function: Internal(not to be used directly)
580             Returns : filename
581             Args : list of Bio::Align::AlignI
582              
583             =cut
584              
585             sub _writeAlignFile{
586 0     0     my ($self, @align) = @_;
587 0           my ($tfh, $inputfile) = $self->io->tempfile(-dir=>$self->tempdir);
588 0           my $out = Bio::AlignIO->new('-fh' => $tfh, '-format' => $self->informat);
589 0           foreach my $a (@align) {
590 0           $out->write_aln($a);
591             }
592 0           $out->close();
593 0           $out = undef;
594 0           close($tfh);
595 0           undef $tfh;
596 0           return $inputfile;
597             }
598              
599             1;