File Coverage

Bio/Search/Result/HmmpfamResult.pm
Criterion Covered Total %
statement 126 130 96.9
branch 29 44 65.9
condition 10 20 50.0
subroutine 14 15 93.3
pod 5 5 100.0
total 184 214 85.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::Result::HmmpfamResult
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::Result::HmmpfamResult - A parser and result object for hmmpfam
17             results
18              
19             =head1 SYNOPSIS
20              
21             # generally we use Bio::SearchIO to build these objects
22             use Bio::SearchIO;
23             my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
24             -file => 'result.hmmer');
25              
26             while (my $result = $in->next_result) {
27             print $result->query_name, " ", $result->algorithm, " ", $result->num_hits(), " hits\n";
28             }
29              
30             =head1 DESCRIPTION
31              
32             This object implements a parser for hmmpfam result output, a program in the HMMER
33             package.
34              
35             =head1 FEEDBACK
36              
37             =head2 Mailing Lists
38              
39             User feedback is an integral part of the evolution of this and other
40             Bioperl modules. Send your comments and suggestions preferably to
41             the Bioperl mailing list. Your participation is much appreciated.
42              
43             bioperl-l@bioperl.org - General discussion
44             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45              
46             =head2 Support
47              
48             Please direct usage questions or support issues to the mailing list:
49              
50             I
51              
52             rather than to the module maintainer directly. Many experienced and
53             reponsive experts will be able look at the problem and quickly
54             address it. Please include a thorough description of the problem
55             with code and data examples if at all possible.
56              
57             =head2 Reporting Bugs
58              
59             Report bugs to the Bioperl bug tracking system to help us keep track
60             of the bugs and their resolution. Bug reports can be submitted via the
61             web:
62              
63             https://github.com/bioperl/bioperl-live/issues
64              
65             =head1 AUTHOR - Sendu Bala
66              
67             Email bix@sendu.me.uk
68              
69             =head1 APPENDIX
70              
71             The rest of the documentation details each of the object methods.
72             Internal methods are usually preceded with a _
73              
74             =cut
75              
76             # Let the code begin...
77              
78             package Bio::Search::Result::HmmpfamResult;
79              
80 1     1   3 use strict;
  1         1  
  1         23  
81              
82 1     1   413 use Bio::Search::Hit::HmmpfamHit;
  1         2  
  1         29  
83              
84 1     1   4 use base qw(Bio::Root::Root Bio::Search::Result::PullResultI);
  1         1  
  1         464  
85              
86             =head2 new
87              
88             Title : new
89             Usage : my $obj = Bio::SearchIO::Result::hmmpfam->new();
90             Function: Builds a new Bio::SearchIO::Result::hmmpfam object
91             Returns : Bio::SearchIO::Result::hmmpfam
92             Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
93             -parent => Bio::PullParserI object (required if no -chunk)
94             -parameters => hash ref of search parameters (key => value), optional
95             -statistics => hash ref of search statistics (key => value), optional
96              
97             where the array ref provided to -chunk contains an IO object
98             for a filehandle to something representing the raw data of the
99             result, and $start and $end define the tell() position within the
100             filehandle that the result data starts and ends (optional; defaults
101             to start and end of the entire thing described by the filehandle)
102              
103             =cut
104              
105             sub new {
106 3     3 1 6 my ($class, @args) = @_;
107 3         11 my $self = $class->SUPER::new(@args);
108            
109 3         10 $self->_setup(@args);
110            
111 3         7 foreach my $field (qw( header hit_table hsp_table alignments next_model models query_length )) {
112 21         23 $self->_fields->{$field} = undef;
113             }
114            
115 3         20 $self->_dependencies( { ( query_name => 'header',
116             query_accession => 'header',
117             query_description => 'header',
118             hit_table => 'header',
119             num_hits => 'hit_table',
120             no_hits_found => 'hit_table',
121             hsp_table => 'hit_table',
122             next_alignment => 'hsp_table' ) } );
123            
124 3         8 return $self;
125             }
126              
127             #
128             # PullParserI discovery methods so we can answer all ResultI questions
129             #
130              
131             sub _discover_header {
132 3     3   5 my $self = shift;
133 3         9 $self->_chunk_seek(0);
134 3         9 my $header = $self->_get_chunk_by_end("all domains):\n");
135 3         5 $self->{_after_header} = $self->_chunk_tell;
136            
137 3 50       7 $header || $self->throw("Could not find hmmer header, is file really hmmer format?");
138            
139 3         19 ($self->_fields->{query_name}) = $header =~ /^Query(?:\s+sequence)?:\s+(\S+)/m;
140 3         9 ($self->_fields->{query_accession}) = $header =~ /^Accession:\s+(\S+)/m;
141 3         10 ($self->_fields->{query_description}) = $header =~ /^Description:\s+(\S.+)/m;
142 3   100     5 $self->_fields->{query_accession} ||= '';
143 3   100     5 $self->_fields->{query_description} ||= '';
144            
145 3         4 $self->_fields->{header} = 1; # stop this method being called again
146             }
147              
148             sub _discover_hit_table {
149 3     3   5 my $self = shift;
150            
151 3         8 $self->_chunk_seek($self->{_after_header});
152 3         9 my $table = $self->_get_chunk_by_end("for domains:\n");
153 3         6 $self->{_after_hit_table} = $self->_chunk_tell;
154            
155 3         8 my $evalue_cutoff = $self->get_field('evalue_cutoff');
156 3 50       9 undef $evalue_cutoff if $evalue_cutoff eq '[unset]';
157 3         5 my $score_cutoff = $self->get_field('score_cutoff');
158 3 50       7 undef $score_cutoff if $score_cutoff eq '[unset]';
159 3         14 my $hsps_cutoff = $self->get_field('hsps_cutoff');
160 3 50       7 undef $hsps_cutoff if $hsps_cutoff eq '[unset]';
161            
162 3         1 my @table;
163 3         3 my $no_hit = 1;
164 3         621 while ($table =~ /^(\S+)\s+(\S.+?)?\s+(\S+)\s+(\S+)\s+(\d+)\n/gm) {
165 57         34 $no_hit = 0;
166 57         76 my $evalue = abs($4); # consistency for tests under Windows
167 57 50 33     71 next if ($evalue_cutoff && $evalue > $evalue_cutoff);
168 57 50 33     69 next if ($score_cutoff && $3 < $score_cutoff);
169 57 50 33     75 next if ($hsps_cutoff && $5 < $hsps_cutoff);
170 57         579 push(@table, [$1, $2, $3, $evalue, $5]);
171             }
172 3         8 $self->_fields->{hit_table} = \@table;
173 3 50       8 $self->{_next_hit_index} = @table > 0 ? 0 : -1;
174            
175 3         6 $self->_fields->{no_hits_found} = $no_hit;
176 3         5 $self->_fields->{num_hits} = @table;
177             }
178              
179             sub _discover_hsp_table {
180 3     3   4 my $self = shift;
181            
182 3         8 $self->_chunk_seek($self->{_after_hit_table});
183 3         6 my $table = $self->_get_chunk_by_end("top-scoring domains:\n");
184 3   33     9 $table ||= $self->_get_chunk_by_end("//\n"); # A0 reports
185 3         7 $self->{_after_hsp_table} = $self->_chunk_tell;
186            
187 3         3 my %table;
188             # can't save this regex work for the hsp object because the hit object needs
189             # its length, so may as well just do all the work here
190 3         32 while ($table =~ /^(\S+)\s+(\d+)\/\d+\s+(\d+)\s+(\d+)\s+\S\S\s+(\d+)\s+(\d+)\s+\S(\S)\s+(\S+)\s+(\S+)/gm) {
191             # rank query_start query_end hit_start hit_end score evalue
192 59         77 my $evalue = abs($9); # consistency for tests under Windows
193 59         30 push(@{$table{$1}->{hsp_data}}, [$2, $3, $4, $5, $6, $8, $evalue]);
  59         252  
194 59 100       164 if ($7 eq ']') {
195 34         154 $table{$1}->{hit_length} = $6;
196             }
197             }
198 3         8 $self->_fields->{hsp_table} = \%table;
199             }
200              
201             sub _discover_alignments {
202 3     3   4 my $self = shift;
203 3         5 $self->_fields->{alignments} = { };
204             }
205              
206             sub _next_alignment {
207 26     26   23 my $self = shift;;
208 26 100       34 return if $self->{_no_more_alignments};
209            
210 25         34 my $aligns = $self->_fields->{alignments};
211            
212 25 100       33 unless (defined $self->{_after_previous_alignment}) {
213 3         8 $self->_chunk_seek($self->{_after_hsp_table});
214 3         7 my $chunk = $self->_get_chunk_by_end(": domain");
215 3 100       8 unless ($chunk) {
216 1         2 $self->{_no_more_alignments} = 1;
217 1         4 return;
218             }
219            
220 2         4 $self->{_after_previous_alignment} = $self->_chunk_tell;
221 2         4 $self->{_next_alignment_start_text} = $chunk;
222 2         9 return $self->_next_alignment;
223             }
224            
225 22         32 $self->_chunk_seek($self->{_after_previous_alignment});
226 22         32 my $chunk = $self->_get_chunk_by_end(": domain");
227 22 100       28 unless ($chunk) {
228 1         2 $self->_chunk_seek($self->{_after_previous_alignment});
229 1         1 $chunk = $self->_get_chunk_by_end("//");
230            
231 1 50       3 unless ($chunk) {
232 0         0 $self->{_no_more_alignments} = 1;
233 0         0 return;
234             }
235             }
236            
237 22         28 $self->{_after_previous_alignment} = $self->_chunk_tell;
238            
239 22 50       29 if (defined $self->{_next_alignment_start_text}) {
240 22         60 $chunk = $self->{_next_alignment_start_text}.$chunk;
241             }
242            
243 22         220 $chunk =~ s/(\S+: domain)$//;
244 22         35 $self->{_next_alignment_start_text} = $1;
245            
246 22         54 my ($name, $domain) = $chunk =~ /^(\S+): domain (\d+)/;
247 22         49 $aligns->{$name.'~~~~'.$domain} = $chunk;
248 22         70 return 1;
249             }
250              
251             sub _discover_next_hit {
252 21     21   18 my $self = shift;
253 21         20 my @hit_table = @{$self->get_field('hit_table')};
  21         33  
254 21 100       41 return if $self->{_next_hit_index} == -1;
255            
256             #[name description score significance num_hsps rank]
257 16         16 my @hit_data = (@{$hit_table[$self->{_next_hit_index}++]}, $self->{_next_hit_index});
  16         38  
258            
259 16         68 $self->_fields->{next_hit} = Bio::Search::Hit::HmmpfamHit->new(-parent => $self,
260             -hit_data => \@hit_data);
261            
262 16 100       50 if ($self->{_next_hit_index} > $#hit_table) {
263 5         10 $self->{_next_hit_index} = -1;
264             }
265             }
266              
267             =head2 next_hit
268              
269             Title : next_hit
270             Usage : while( $hit = $result->next_hit()) { ... }
271             Function: Returns the next available Hit object, representing potential
272             matches between the query and various entities from the database.
273             Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
274             Args : none
275              
276             =cut
277              
278             sub next_hit {
279 21     21 1 371 my $self = shift;
280 21         35 my $hit = $self->get_field('next_hit');
281 21         28 undef $self->_fields->{next_hit};
282 21         42 return $hit;
283             }
284              
285             =head2 next_model
286              
287             Title : next_model
288             Usage : my $domain = $result->next_model
289             Function: Returns the next domain - this is an alias for next_hit()
290             Returns : L object
291             Args : none
292              
293             =cut
294              
295             *next_model = \&next_hit;
296              
297             =head2 hits
298              
299             Title : hits
300             Usage : my @hits = $result->hits
301             Function: Returns the HitI objects contained within this Result
302             Returns : Array of Bio::Search::Hit::HitI objects
303             Args : none
304              
305             See Also: L
306              
307             =cut
308              
309             sub hits {
310 3     3 1 6 my $self = shift;
311 3   50     12 my $old = $self->{_next_hit_index} || 0;
312 3         6 $self->rewind;
313 3         3 my @hits;
314 3         5 while (defined(my $hit = $self->next_hit)) {
315 5         9 push(@hits, $hit);
316             }
317 3 50       6 $self->{_next_hit_index} = @hits > 0 ? $old : -1;
318 3         8 return @hits;
319             }
320              
321             =head2 models
322              
323             Title : models
324             Usage : my @domains = $result->models;
325             Function: Returns the list of HMM models seen - this is an alias for hits()
326             Returns : Array of L objects
327             Args : none
328              
329             =cut
330              
331             *models = \&hits;
332              
333             =head2 sort_hits
334              
335             Title : sort_hits
336             Usage : $result->sort_hits('
337             Function : Sorts the hits so that they come out in the desired order when
338             hits() or next_hit() is called.
339             Returns : n/a
340             Args : A coderef for the sort function. See the documentation on the Perl
341             sort() function for guidelines on writing sort functions.
342             You will be sorting array references, not HitI objects. The
343             references contain name as element 0, description as element 1,
344             score as element 2, significance as element 3 and number of hsps
345             as element 4.
346             By default the sort order is ascending significance value (ie.
347             most significant hits first).
348             Note : To access the special variables $a and $b used by the Perl sort()
349             function the user function must access
350             Bio::Search::Result::HmmpfamResult namespace.
351             For example, use :
352             $result->sort_hits(
353             sub{$Bio::Search::Result::HmmpfamResult::a->[2]
354             <=>
355             $Bio::Search::Result::HmmpfamResult::b->[2]});
356             NOT $result->sort_hits($a->[2] <=> $b->[2]);
357              
358             =cut
359              
360             sub sort_hits {
361 2     2 1 2 my ($self, $code_ref) = @_;
362 2   50 0   6 $code_ref ||= sub { $a->[3] <=> $b->[3] };
  0         0  
363            
364             # avoid creating hit objects just to sort, hence force user to sort on
365             # the array references in hit table
366 2         4 my $table_ref = $self->get_field('hit_table');
367 2 50       3 @{$table_ref} > 1 || return;
  2         5  
368            
369 2         1 my @sorted = sort $code_ref @{$table_ref};
  2         7  
370 2 50       11 @sorted == @{$table_ref} || $self->throw("Your sort routine failed to give back all hits!");
  2         4  
371 2         3 $self->_fields->{hit_table} = \@sorted;
372             }
373              
374             =head2 rewind
375              
376             Title : rewind
377             Usage : $result->rewind;
378             Function: Allow one to reset the Hit iterator to the beginning, so that
379             next_hit() will subsequently return the first hit and so on.
380             Returns : n/a
381             Args : none
382              
383             =cut
384              
385             sub rewind {
386 3     3 1 4 my $self = shift;
387 3 50       6 unless ($self->_fields->{hit_table}) {
388 0         0 $self->get_field('hit_table');
389             }
390 3 50       3 $self->{_next_hit_index} = @{$self->_fields->{hit_table}} > 0 ? 0 : -1;
  3         5  
391             }
392              
393             1;