File Coverage

Bio/SearchIO/hmmer_pull.pm
Criterion Covered Total %
statement 50 60 83.3
branch 9 24 37.5
condition 6 10 60.0
subroutine 9 10 90.0
pod 3 3 100.0
total 77 107 71.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SearchIO::hmmer_pull
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SearchIO::hmmer_pull - A parser for HMMER output
17              
18             =head1 SYNOPSIS
19              
20             # do not use this class directly it is available through Bio::SearchIO
21             use Bio::SearchIO;
22             my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
23             -file => 't/data/hmmpfam.bigout');
24             while (my $result = $in->next_result) {
25             # this is a Bio::Search::Result::HmmpfamResult object
26             print $result->query_name(), " for HMM ", $result->hmm_name(), "\n";
27             while (my $hit = $result->next_hit) {
28             print $hit->name(), "\n";
29             while (my $hsp = $hit->next_hsp) {
30             print "length is ", $hsp->length(), "\n";
31             }
32             }
33             }
34              
35             =head1 DESCRIPTION
36              
37             This object implements a pull-parser for HMMER output. It is fast since it
38             only does work on request (hence 'pull').
39              
40             =head1 FEEDBACK
41              
42             =head2 Mailing Lists
43              
44             User feedback is an integral part of the evolution of this and other
45             Bioperl modules. Send your comments and suggestions preferably to
46             the Bioperl mailing list. Your participation is much appreciated.
47              
48             bioperl-l@bioperl.org - General discussion
49             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50              
51             =head2 Support
52              
53             Please direct usage questions or support issues to the mailing list:
54              
55             I
56              
57             rather than to the module maintainer directly. Many experienced and
58             reponsive experts will be able look at the problem and quickly
59             address it. Please include a thorough description of the problem
60             with code and data examples if at all possible.
61              
62             =head2 Reporting Bugs
63              
64             Report bugs to the Bioperl bug tracking system to help us keep track
65             of the bugs and their resolution. Bug reports can be submitted via the
66             web:
67              
68             https://github.com/bioperl/bioperl-live/issues
69              
70             =head1 AUTHOR - Sendu Bala
71              
72             Email bix@sendu.me.uk
73              
74             =head1 APPENDIX
75              
76             The rest of the documentation details each of the object methods.
77             Internal methods are usually preceded with a _
78              
79             =cut
80              
81             # Let the code begin...
82              
83             package Bio::SearchIO::hmmer_pull;
84              
85 1     1   3 use strict;
  1         1  
  1         28  
86              
87              
88 1     1   3 use base qw(Bio::SearchIO Bio::PullParserI);
  1         2  
  1         492  
89              
90             =head2 new
91              
92             Title : new
93             Usage : my $obj = Bio::SearchIO::hmmer_pull->new();
94             Function: Builds a new Bio::SearchIO::hmmer_pull object
95             Returns : Bio::SearchIO::hmmer_pull
96             Args : -fh/-file => HMMER output filename
97             -format => 'hmmer_pull'
98             -evalue => float or scientific notation number to be used
99             as an evalue cutoff for hits
100             -score => integer or scientific notation number to be used
101             as a score value cutoff for hits
102             -hsps => integer minimum number of hsps (domains) a hit must have
103             -piped_behaviour => 'temp_file'|'memory'|'sequential_read'
104              
105             -piped_behaviour defines what the parser should do if the input is
106             an unseekable filehandle (eg. piped input), see
107             Bio::PullParserI::chunk for details. Default is 'sequential_read'.
108              
109             =cut
110              
111             sub _initialize {
112 2     2   8 my ($self, @args) = @_;
113            
114             # don't do normal SearchIO initialization
115            
116 2         15 my ($writer, $file, $fh, $piped_behaviour, $evalue, $score, $hsps) =
117             $self->_rearrange([qw(WRITER
118             FILE FH
119             PIPED_BEHAVIOUR
120             EVALUE
121             SCORE
122             HSPS)], @args);
123 2 50       10 $self->writer($writer) if $writer;
124            
125 2         42 $self->_fields( { ( header => undef,
126             algorithm => undef,
127             algorithm_version => undef,
128             algorithm_reference => '',
129             hmm_file => undef,
130             hmm_name => undef,
131             sequence_file => undef,
132             sequence_database => undef,
133             database_name => undef,
134             database_letters => undef,
135             database_entries => undef,
136             next_result => undef,
137             evalue_cutoff => '[unset]',
138             score_cutoff => '[unset]',
139             hsps_cutoff => '[unset]' ) } );
140            
141 2 50       6 $self->_fields->{evalue_cutoff} = $evalue if $evalue;
142 2 50       4 $self->_fields->{score_cutoff} = $score if $score;
143 2 50       5 $self->_fields->{hsps_cutoff} = $hsps if $hsps;
144            
145 2         20 $self->_dependencies( { ( algorithm => 'header',
146             algorithm_version => 'header',
147             hmm_file => 'header',
148             hmm_name => 'header',
149             sequence_file => 'header',
150             sequence_database => 'header' ) } );
151            
152 2   33     26 $self->chunk($file || $fh || $self->throw("-file or -fh must be supplied"),
      50        
153             -piped_behaviour => $piped_behaviour || 'sequential_read');
154             }
155              
156             sub _discover_header {
157 2     2   4 my $self = shift;
158 2         22 $self->_chunk_seek(0);
159 2         13 my $header = $self->_get_chunk_by_nol(8);
160 2         4 $self->{_after_header} = $self->_chunk_tell;
161            
162 2         14 my ($algo) = $header =~ /^(hmm\S+) - search/m;
163 2         6 $self->_fields->{algorithm} = uc $algo;
164            
165 2         12 ($self->_fields->{algorithm_version}) = $header =~ /^HMMER\s+?(\S+)/m;
166            
167 2         14 ($self->_fields->{hmm_file}) = $header =~ /^HMM file:\s.+?(\S+)$/m;
168 2         5 $self->_fields->{hmm_name} = $self->_fields->{hmm_file};
169            
170 2         16 ($self->_fields->{sequence_file}) = $header =~ /^Sequence (?:file|database):\s.+?(\S+)$/m;
171 2         5 $self->_fields->{sequence_database} = $self->_fields->{sequence_file};
172            
173 2         5 $self->_fields->{header} = 1;
174             }
175              
176             sub _discover_database_name {
177 1     1   2 my $self = shift;
178 1         2 my $type = $self->get_field('algorithm');
179            
180 1 50       4 if ($type eq 'HMMPFAM') {
    0          
181 1         3 $self->_fields->{database_name} = $self->get_field('hmm_file');
182             }
183             elsif ($type eq 'HMMSEARCH') {
184 0         0 $self->_fields->{database_name} = $self->get_field('sequence_file');
185             }
186             }
187              
188             sub _discover_next_result {
189 4     4   6 my $self = shift;
190 4         10 my $type = $self->get_field('algorithm'); # also sets _after_header if not set
191            
192 4 50       12 if ($type eq 'HMMPFAM') {
    0          
193 1     1   428 use Bio::Search::Result::HmmpfamResult;
  1         3  
  1         395  
194            
195 4 50       10 unless ($self->_sequential) {
196 4   66     23 $self->_chunk_seek($self->{_end_of_previous_result} || $self->{_after_header});
197            
198 4         21 my ($start, $end) = $self->_find_chunk_by_end("//\n");
199 4 100       14 return if $start == $end;
200 3         12 $self->_fields->{next_result} = Bio::Search::Result::HmmpfamResult->new(-chunk => [($self->chunk, $start, $end)],
201             -parent => $self);
202            
203 3         10 $self->{_end_of_previous_result} = $end;
204             }
205             else {
206             # deliberatly don't cache these, which means rewind won't work;
207             # if we cached we may as well have used 'memory' option to
208             # -piped_behaviour
209 0         0 my $chunk = $self->_get_chunk_by_end("//\n");
210 0 0       0 $chunk || return;
211 0         0 $self->_fields->{next_result} = Bio::Search::Result::HmmpfamResult->new(-chunk => [$chunk],
212             -parent => $self);
213             }
214             }
215             elsif ($type eq 'HMMSEARCH') {
216 0         0 $self->throw("Can't handle hmmsearch yet\n");
217             }
218             else {
219 0         0 $self->throw("Unknown report type");
220             }
221             }
222              
223             =head2 next_result
224              
225             Title : next_result
226             Usage : my $hit = $searchio->next_result;
227             Function: Returns the next Result from a search
228             Returns : Bio::Search::Result::ResultI object
229             Args : none
230              
231             =cut
232              
233             sub next_result {
234 4     4 1 18 my $self = shift;
235 4   100     17 my $result = $self->get_field('next_result') || return;
236            
237 3         8 undef $self->_fields->{next_result};
238            
239 3         6 $self->{'_result_count'}++;
240 3         8 return $result;
241             }
242              
243             =head2 result_count
244              
245             Title : result_count
246             Usage : my $count = $searchio->result_count
247             Function: Returns the number of results we have processed.
248             Returns : integer
249             Args : none
250              
251             =cut
252              
253             sub result_count {
254 1     1 1 9 my $self = shift;
255 1         6 return $self->{'_result_count'};
256             }
257              
258             =head2 rewind
259              
260             Title : rewind
261             Usage : $searchio->rewind;
262             Function: Allow one to reset the Result iterator to the beginning, so that
263             next_result() will subsequently return the first result and so on.
264              
265             NB: result objects are not cached, so you will get new result objects
266             each time you rewind. Also, note that result_count() counts the
267             number of times you have called next_result(), so will not be able
268             tell you how many results there were in the file if you use rewind().
269              
270             Returns : n/a
271             Args : none
272              
273             =cut
274              
275             sub rewind {
276 0     0 1   my $self = shift;
277 0 0         if ($self->_sequential) {
278 0           $self->warn("rewind has no effect on piped input when you have chosen 'sequential_read' mode");
279             }
280 0           delete $self->{_end_of_previous_result};
281             }
282              
283             1;