File Coverage

Bio/Search/Result/BlastPullResult.pm
Criterion Covered Total %
statement 132 137 96.3
branch 62 74 83.7
condition 12 21 57.1
subroutine 13 14 92.8
pod 7 7 100.0
total 226 253 89.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::Result::BlastPullResult
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::Result::BlastPullResult - A parser and result object for BLASTN
17             results
18              
19             =head1 SYNOPSIS
20              
21             # generally we use Bio::SearchIO to build these objects
22             use Bio::SearchIO;
23             my $in = Bio::SearchIO->new(-format => 'blast_pull',
24             -file => 'result.blast');
25              
26             while (my $result = $in->next_result) {
27             print $result->query_name, " ", $result->algorithm, " ", $result->num_hits(), " hits\n";
28             }
29              
30             =head1 DESCRIPTION
31              
32             This object implements a parser for NCBI BLASTN result output.
33              
34             =head1 FEEDBACK
35              
36             =head2 Mailing Lists
37              
38             User feedback is an integral part of the evolution of this and other
39             Bioperl modules. Send your comments and suggestions preferably to
40             the Bioperl mailing list. Your participation is much appreciated.
41              
42             bioperl-l@bioperl.org - General discussion
43             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44              
45             =head2 Support
46              
47             Please direct usage questions or support issues to the mailing list:
48              
49             I
50              
51             rather than to the module maintainer directly. Many experienced and
52             reponsive experts will be able look at the problem and quickly
53             address it. Please include a thorough description of the problem
54             with code and data examples if at all possible.
55              
56             =head2 Reporting Bugs
57              
58             Report bugs to the Bioperl bug tracking system to help us keep track
59             of the bugs and their resolution. Bug reports can be submitted via the
60             web:
61              
62             https://github.com/bioperl/bioperl-live/issues
63              
64             =head1 AUTHOR - Sendu Bala
65              
66             Email bix@sendu.me.uk
67              
68             =head1 CONTRIBUTORS
69              
70             Additional contributors names and emails here
71              
72             =head1 APPENDIX
73              
74             The rest of the documentation details each of the object methods.
75             Internal methods are usually preceded with a _
76              
77             =cut
78              
79             # Let the code begin...
80              
81             package Bio::Search::Result::BlastPullResult;
82              
83 1     1   3 use strict;
  1         1  
  1         21  
84              
85 1     1   429 use Bio::Search::Hit::BlastPullHit;
  1         3  
  1         31  
86              
87 1     1   5 use base qw(Bio::Root::Root Bio::Search::Result::PullResultI);
  1         1  
  1         523  
88              
89             =head2 new
90              
91             Title : new
92             Usage : my $obj = Bio::SearchIO::Result::hmmpfam->new();
93             Function: Builds a new Bio::SearchIO::Result::hmmpfam object
94             Returns : Bio::SearchIO::Result::hmmpfam
95             Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
96             -parent => Bio::PullParserI object (required if no -chunk)
97             -parameters => hash ref of search parameters (key => value), optional
98             -statistics => hash ref of search statistics (key => value), optional
99              
100             where the array ref provided to -chunk contains an IO object
101             for a filehandle to something representing the raw data of the
102             result, and $start and $end define the tell() position within the
103             filehandle that the result data starts and ends (optional; defaults
104             to start and end of the entire thing described by the filehandle)
105              
106             =cut
107              
108             sub new {
109 17     17 1 30 my ($class, @args) = @_;
110 17         46 my $self = $class->SUPER::new(@args);
111            
112 17         53 $self->_setup(@args);
113            
114 17         26 foreach my $field (qw( header hit_table hsp_table alignments next_model
115             models query_length stats_and_params)) {
116 136         145 $self->_fields->{$field} = undef;
117             }
118            
119 17         78 $self->_dependencies( { ( query_name => 'header',
120             query_accession => 'header',
121             query_description => 'header',
122             query_length => 'header',
123             hit_table => 'header',
124             num_hits => 'hit_table',
125             no_hits_found => 'hit_table' ) } );
126            
127 17         43 return $self;
128             }
129              
130             #
131             # PullParserI discovery methods so we can answer all ResultI questions
132             #
133              
134             sub _discover_header {
135 17     17   18 my $self = shift;
136 17         35 $self->_chunk_seek(0);
137 17         32 my $header = $self->_get_chunk_by_end("Value\n");
138 17 50       33 if (!$header) {
139 0         0 $header = $self->_get_chunk_by_end("***** No hits found ******\n");
140 0         0 $self->{_no_hits_found} = 1;
141             }
142 17 50       29 $self->throw("Invalid header returned") unless $header;
143 17         36 $self->{_after_header} = $self->_chunk_tell;
144            
145 17         74 ($self->_fields->{query_name}) = $header =~ /^\s*(\S+)/;
146 17         36 $self->_fields->{query_accession} = '';
147 17         34 $self->_fields->{query_description} = '';
148            
149 17 100       97 if ($header =~ /^Length=(\d+)/m) {
    100          
150 5         13 $self->_fields->{query_length} = $1;
151             }
152             elsif ($header =~ /\((\d+) letters\)/) {
153             # older form?
154 11         25 $self->_fields->{query_length} = $1;
155            
156 11 100       45 if ($header =~ /^\s*\(\d+ letters/) {
157             # there wasn't a query sequence name
158 1         4 $self->_fields->{query_name} = '';
159             }
160             }
161            
162 17         27 $self->_fields->{header} = 1;
163             }
164              
165             sub _discover_hit_table {
166 13     13   22 my $self = shift;
167 13         28 $self->_chunk_seek($self->{_after_header});
168            
169 13         29 my $table = $self->_get_chunk_by_end("\n>");
170            
171 13 50 66     36 if (!$table && !$self->{_no_hits_found}) {
172             # no alignments, presumably; hit table comprises the remainder of this
173             # result
174 2         7 while (my $line = $self->_get_chunk_by_nol(1)) {
175 22         51 $table .= $line;
176             }
177             }
178            
179 13   50     22 $table ||= '';
180            
181 13         27 $self->{_after_hit_table} = $self->_chunk_tell;
182            
183 13         34 my $evalue_cutoff = $self->get_field('evalue_cutoff');
184 13 50       32 undef $evalue_cutoff if $evalue_cutoff eq '[unset]';
185 13         27 my $score_cutoff = $self->get_field('score_cutoff');
186 13 50       25 undef $score_cutoff if $score_cutoff eq '[unset]';
187            
188 13         15 my @table;
189 13         14 my $no_hit = 1;
190 13         179 while ($table =~ /^(\S+)\s+(\S.*?)?\s+(\S+)\s+([\de]\S*)\s*\n/gm) {
191 93         74 $no_hit = 0;
192 93         173 my ($name, $desc, $score, $evalue) = ($1, $2, $3, $4);
193 93   100     110 $desc ||= '';
194 93 50       115 if ($evalue =~ /^e/) {
195 0         0 $evalue = '1'.$evalue;
196             }
197 93 50 33     134 next if ($evalue_cutoff && $evalue > $evalue_cutoff);
198 93 50 33     126 next if ($score_cutoff && $score < $score_cutoff);
199 93         768 push(@table, [$name, $desc, $score, $evalue]);
200             }
201 13         30 $self->_fields->{hit_table} = \@table;
202 13 50       35 $self->{_next_hit_index} = @table > 0 ? 0 : -1;
203            
204 13         26 $self->_fields->{no_hits_found} = $no_hit;
205 13         23 $self->_fields->{num_hits} = @table;
206             }
207              
208             sub _discover_next_hit {
209 41     41   37 my $self = shift;
210 41         80 my $hit_table = $self->get_field('hit_table');
211 41 100       81 return if $self->{_next_hit_index} == -1;
212 39         40 my $hit_row = ${$hit_table}[$self->{_next_hit_index}];
  39         65  
213            
214 39   66     115 $self->_chunk_seek($self->{_end_of_previous_hit} || $self->{_after_hit_table});
215            
216 39         77 my ($start, $end) = $self->_find_chunk_by_end("\n>");
217 39 100       76 unless ($end) {
218 21   66     33 $start = $self->{_end_of_previous_hit} || $self->{_after_hit_table};
219 21         36 $end = $self->_chunk_true_end;
220             }
221             else {
222 18         32 $end += $self->_chunk_true_start;
223             }
224 39         54 $start += $self->_chunk_true_start;
225            
226 39         53 $self->{_end_of_previous_hit} = $end - $self->_chunk_true_start;
227            
228             #*** needs to inherit piped_behaviour, and we need to deal with _sequential
229             # ourselves
230 39         80 $self->_fields->{next_hit} = Bio::Search::Hit::BlastPullHit->new(-parent => $self,
231             -chunk => [$self->chunk, $start, $end],
232             -hit_data => $hit_row);
233            
234 39         64 $self->{_next_hit_index}++;
235            
236 39 100       37 if ($self->{_next_hit_index} > $#{$hit_table}) {
  39         88  
237 9         18 $self->{_next_hit_index} = -1;
238             }
239             }
240              
241             sub _discover_stats_and_params {
242 4     4   7 my $self = shift;
243 4         10 $self->_chunk_seek(0);
244 4         12 my ($start, $end) = $self->_find_chunk_by_end("\n Database: ");
245 4         9 $self->_chunk_seek($end);
246            
247 4         4 my $gapped = 0;
248 4         12 while ($self->_get_chunk_by_nol(1)) {
249 135 100       1059 if (/Number of letters in database:\s+(\S+)/) {
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
250 4         11 my $stat = $1;
251 4         11 $stat =~ s/,//g;
252 4         22 $self->add_statistic('dbletters', $stat);
253             }
254             elsif (/Number of sequences in database:\s+(\S+)/) {
255 4         9 my $stat = $1;
256 4         10 $stat =~ s/,//g;
257 4         8 $self->add_statistic('dbentries', $stat);
258             }
259             elsif (/^Lambda/) {
260 8         12 my $line = $self->_get_chunk_by_nol(1);
261 8         28 $line =~ /\s+(\S+)\s+(\S+)\s+(\S+)/;
262 8 100       22 $self->add_statistic($gapped ? 'lambda_gapped' : 'lambda', $1);
263 8 100       18 $self->add_statistic($gapped ? 'kappa_gapped' : 'kappa', $2);
264 8 100       17 $self->add_statistic($gapped ? 'entropy_gapped' : 'entropy', $3);
265 8         14 $gapped = 1;
266             }
267             elsif (/^Matrix: (.+)\n/) {
268 4         15 $self->add_parameter('matrix', $1);
269             }
270             elsif (/^Gap Penalties: Existence: (\d+), Extension: (\d+)/) {
271 4         10 $self->add_parameter('gapopen', $1);
272 4         6 $self->add_parameter('gapext', $2);
273             }
274             elsif (/^Number of Hits to DB: (\d+)/) {
275 4         15 $self->add_statistic('Hits_to_DB', $1);
276             }
277             elsif (/^Number of extensions: (\d+)/) {
278 4         9 $self->add_statistic('num_extensions', $1);
279             }
280             elsif (/^Number of successful extensions: (\d+)/) {
281 4         13 $self->add_statistic('num_successful_extensions', $1);
282             }
283             elsif (/^Number of sequences better than (\S+):/) {
284 4         10 $self->add_parameter('expect', $1);
285             }
286             elsif (/^[Ll]ength of query: (\d+)/) {
287 4         12 $self->add_statistic('querylength', $1);
288             }
289             elsif (/^[Ee]ffective HSP length: (\d+)/) {
290 2         5 $self->add_statistic('effective_hsplength', $1);
291             }
292             elsif (/^[Ee]ffective length of database: (\d+)/) {
293 4         9 $self->add_statistic('effectivedblength', $1);
294             }
295             elsif (/^[Ee]ffective search space: (\d+)/) {
296 4         11 $self->add_statistic('effectivespace', $1);
297             }
298             elsif (/^[Ee]ffective search space used: (\d+)/) {
299 4         7 $self->add_statistic('effectivespaceused', $1);
300             }
301             elsif (/^([TAXS]\d?): (\d+)(?: \((\S+))?/) {
302 25         39 $self->add_statistic($1, $2);
303 25 100       70 $self->add_statistic($1.'_bits', $3) if $3;
304             }
305             }
306            
307 4         9 $self->_fields->{stats_and_params} = 1;
308             }
309              
310             =head2 next_hit
311              
312             Title : next_hit
313             Usage : while( $hit = $result->next_hit()) { ... }
314             Function: Returns the next available Hit object, representing potential
315             matches between the query and various entities from the database.
316             Returns : a Bio::Search::Hit::HitI object or undef if there are no more.
317             Args : none
318              
319             =cut
320              
321             sub next_hit {
322 41     41 1 391 my $self = shift;
323 41         82 my $hit = $self->get_field('next_hit');
324 41         53 undef $self->_fields->{next_hit};
325 41         81 return $hit;
326             }
327              
328             =head2 hits
329              
330             Title : hits
331             Usage : my @hits = $result->hits
332             Function: Returns the HitI objects contained within this Result
333             Returns : Array of Bio::Search::Hit::HitI objects
334             Args : none
335              
336             See Also: L
337              
338             =cut
339              
340             sub hits {
341 2     2 1 5 my $self = shift;
342 2   50     10 my $old = $self->{_next_hit_index} || 0;
343 2         7 $self->rewind;
344 2         2 my @hits;
345 2         6 while (defined(my $hit = $self->next_hit)) {
346 20         30 push(@hits, $hit);
347             }
348 2 50       5 $self->{_next_hit_index} = @hits > 0 ? $old : -1;
349 2         9 return @hits;
350             }
351              
352             =head2 sort_hits
353              
354             Title : sort_hits
355             Usage : $result->sort_hits('
356             Function : Sorts the hits so that they come out in the desired order when
357             hits() or next_hit() is called.
358             Returns : n/a
359             Args : A coderef for the sort function. See the documentation on the Perl
360             sort() function for guidelines on writing sort functions.
361             By default the sort order is ascending significance value (ie.
362             most significant hits first).
363             *** example
364              
365             =cut
366              
367             sub sort_hits {
368 0     0 1 0 my ($self, $code_ref) = @_;
369 0         0 $self->throw("Not implemented yet");
370             }
371              
372             =head2 rewind
373              
374             Title : rewind
375             Usage : $result->rewind;
376             Function: Allow one to reset the Hit iterator to the beginning, so that
377             next_hit() will subsequently return the first hit and so on.
378             Returns : n/a
379             Args : none
380              
381             =cut
382              
383             sub rewind {
384 2     2 1 3 my $self = shift;
385 2 50       5 unless ($self->_fields->{hit_table}) {
386 2         6 $self->get_field('hit_table');
387             }
388 2 50       3 $self->{_next_hit_index} = @{$self->_fields->{hit_table}} > 0 ? 0 : -1;
  2         6  
389             }
390              
391             =head2 get_statistic
392              
393             Title : get_statistic
394             Usage : my $gap_ext = $result->get_statistic('kappa')
395             Function: Returns the value for a specific statistic available
396             from this result
397             Returns : string
398             Args : name of statistic (string)
399              
400             =cut
401              
402             sub get_statistic {
403 52     52 1 66 my $self = shift;
404 52         109 $self->get_field('stats_and_params');
405 52         124 return $self->SUPER::get_statistic(@_);
406             }
407              
408             =head2 get_parameter
409              
410             Title : get_parameter
411             Usage : my $gap_ext = $result->get_parameter('gapext')
412             Function: Returns the value for a specific parameter used
413             when running this result
414             Returns : string
415             Args : name of parameter (string)
416              
417             =cut
418              
419             sub get_parameter {
420 15     15 1 25 my $self = shift;
421 15         35 $self->get_field('stats_and_params');
422 15         39 return $self->SUPER::get_parameter(@_);
423             }
424              
425             1;