File Coverage

Bio/Search/Hit/BlastPullHit.pm
Criterion Covered Total %
statement 88 92 95.6
branch 22 24 91.6
condition 21 29 72.4
subroutine 12 14 85.7
pod 6 6 100.0
total 149 165 90.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::Hit::BlastPullHit
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::Hit::BlastPullHit - A parser and hit object for BLASTN hits
17              
18             =head1 SYNOPSIS
19              
20             # generally we use Bio::SearchIO to build these objects
21             use Bio::SearchIO;
22             my $in = Bio::SearchIO->new(-format => 'blast_pull',
23             -file => 'result.blast');
24              
25             while (my $result = $in->next_result) {
26             while (my $hit = $result->next_hit) {
27             print $hit->name, "\n";
28             print $hit->score, "\n";
29             print $hit->significance, "\n";
30              
31             while (my $hsp = $hit->next_hsp) {
32             # process HSPI objects
33             }
34             }
35             }
36              
37             =head1 DESCRIPTION
38              
39             This object implements a parser for BLASTN hit output.
40              
41             =head1 FEEDBACK
42              
43             =head2 Mailing Lists
44              
45             User feedback is an integral part of the evolution of this and other
46             Bioperl modules. Send your comments and suggestions preferably to
47             the Bioperl mailing list. Your participation is much appreciated.
48              
49             bioperl-l@bioperl.org - General discussion
50             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51              
52             =head2 Support
53              
54             Please direct usage questions or support issues to the mailing list:
55              
56             I
57              
58             rather than to the module maintainer directly. Many experienced and
59             reponsive experts will be able look at the problem and quickly
60             address it. Please include a thorough description of the problem
61             with code and data examples if at all possible.
62              
63             =head2 Reporting Bugs
64              
65             Report bugs to the Bioperl bug tracking system to help us keep track
66             of the bugs and their resolution. Bug reports can be submitted via the
67             web:
68              
69             https://github.com/bioperl/bioperl-live/issues
70              
71             =head1 AUTHOR - Sendu Bala
72              
73             Email bix@sendu.me.uk
74              
75             =head1 CONTRIBUTORS
76              
77             Additional contributors names and emails here
78              
79             =head1 APPENDIX
80              
81             The rest of the documentation details each of the object methods.
82             Internal methods are usually preceded with a _
83              
84             =cut
85              
86             # Let the code begin...
87              
88             package Bio::Search::Hit::BlastPullHit;
89              
90 1     1   3 use strict;
  1         1  
  1         21  
91              
92 1     1   445 use Bio::Search::HSP::BlastPullHSP;
  1         3  
  1         36  
93              
94 1     1   6 use base qw(Bio::Root::Root Bio::Search::Hit::PullHitI);
  1         1  
  1         597  
95              
96             =head2 new
97              
98             Title : new
99             Usage : my $obj = Bio::Search::Hit::BlastNHit->new();
100             Function: Builds a new Bio::Search::Hit::BlastNHit object.
101             Returns : Bio::Search::Hit::BlastNHit
102             Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
103             -parent => Bio::PullParserI object (required if no -chunk)
104             -hit_data => array ref with [name description score significance]
105              
106             where the array ref provided to -chunk contains an IO object
107             for a filehandle to something representing the raw data of the
108             hit, and $start and $end define the tell() position within the
109             filehandle that the hit data starts and ends (optional; defaults
110             to start and end of the entire thing described by the filehandle)
111              
112             =cut
113              
114             sub new {
115 39     39 1 83 my ($class, @args) = @_;
116 39         94 my $self = $class->SUPER::new(@args);
117            
118 39         102 $self->_setup(@args);
119            
120 39         63 my $fields = $self->_fields;
121 39         57 foreach my $field (qw( header start_end )) {
122 78         110 $fields->{$field} = undef;
123             }
124            
125 39         68 my $hit_data = $self->_raw_hit_data;
126 39 50 33     161 if ($hit_data && ref($hit_data) eq 'ARRAY') {
127 39         55 foreach my $field (qw(name description score significance)) {
128 156         107 $fields->{$field} = shift(@{$hit_data});
  156         230  
129             }
130             }
131            
132 39         229 $self->_dependencies( { ( name => 'header',
133             length => 'header',
134             description => 'header',
135             accession => 'header',
136             next_hsp => 'header',
137             query_start => 'start_end',
138             query_end => 'start_end',
139             hit_start => 'start_end',
140             hit_end => 'start_end' ) } );
141            
142 39         99 return $self;
143             }
144              
145             #
146             # PullParserI discovery methods so we can answer all HitI questions
147             #
148              
149             sub _discover_header {
150 20     20   18 my $self = shift;
151 20         44 $self->_chunk_seek(0);
152 20         47 my $header = $self->_get_chunk_by_end("\n Score = ");
153            
154 20 100       45 unless ($header) {
155             # no alignment or other data; all information was in the hit table of
156             # the result
157 3         10 $self->_calculate_accession_from_name;
158            
159 3         6 $self->_fields->{header} = 1;
160 3         4 return;
161             }
162            
163 17         32 $self->{_after_header} = $self->_chunk_tell;
164            
165 17         237 ($self->_fields->{name}, $self->_fields->{description}, $self->_fields->{length}) = $header =~ /^(\S+)\s+(\S.+?)?\s+Length\s*=\s*(\d+)/sm;
166 17 100       39 if ($self->_fields->{description}) {
167 15         27 $self->_fields->{description} =~ s/\n//g;
168             }
169             else {
170 2         7 $self->_fields->{description} = '';
171             }
172            
173 17         40 $self->_calculate_accession_from_name;
174            
175 17         31 $self->_fields->{header} = 1;
176             }
177              
178             sub _calculate_accession_from_name {
179 20     20   17 my $self = shift;
180 20         40 my $name = $self->get_field('name');
181 20 100       102 if ($name =~ /.+?\|.+?\|.+?\|(\w+)/) {
    100          
182 9         17 $self->_fields->{accession} = $1;
183             }
184             elsif ($self->_fields->{name} =~ /.+?\|(\w+)?\./) {
185             # old form?
186 6         10 $self->_fields->{accession} = $1;
187             }
188             else {
189 5         10 $self->_fields->{accession} = $name;
190             }
191             }
192              
193             sub _discover_start_end {
194 4     4   5 my $self = shift;
195            
196 4         5 my ($q_start, $q_end, $h_start, $h_end);
197 4         9 foreach my $hsp ($self->hsps) {
198 9         22 my ($this_q_start, $this_h_start) = $hsp->start;
199 9         23 my ($this_q_end, $this_h_end) = $hsp->end;
200            
201 9 100 100     31 if (! defined $q_start || $this_q_start < $q_start) {
202 6         6 $q_start = $this_q_start;
203             }
204 9 100 100     27 if (! defined $h_start || $this_h_start < $h_start) {
205 6         7 $h_start = $this_h_start;
206             }
207            
208 9 100 100     25 if (! defined $q_end || $this_q_end > $q_end) {
209 7         7 $q_end = $this_q_end;
210             }
211 9 100 100     28 if (! defined $h_end || $this_h_end > $h_end) {
212 5         9 $h_end = $this_h_end;
213             }
214             }
215            
216 4         10 $self->_fields->{query_start} = $q_start;
217 4         9 $self->_fields->{query_end} = $q_end;
218 4         8 $self->_fields->{hit_start} = $h_start;
219 4         9 $self->_fields->{hit_end} = $h_end;
220             }
221              
222             sub _discover_next_hsp {
223 177     177   146 my $self = shift;
224 177   66     370 my $pos = $self->{_end_of_previous_hsp} || $self->{_after_header};
225 177 50       265 return unless $pos;
226 177         287 $self->_chunk_seek($pos);
227            
228 177         322 my ($start, $end) = $self->_find_chunk_by_end("\n Score = ");
229 177 100 66     520 if ((defined $end && ($end + $self->_chunk_true_start) > $self->_chunk_true_end) || ! $end) {
      66        
230 45   66     81 $start = $self->{_end_of_previous_hsp} || $self->{_after_header};
231 45         73 $end = $self->_chunk_true_end;
232             }
233             else {
234 132         203 $end += $self->_chunk_true_start;
235             }
236 177         232 $start += $self->_chunk_true_start;
237            
238 177 100       302 return if $start >= $self->_chunk_true_end;
239            
240 145         210 $self->{_end_of_previous_hsp} = $end - $self->_chunk_true_start;
241            
242             #*** needs to inherit piped_behaviour, and we need to deal with _sequential
243             # ourselves
244 145         263 $self->_fields->{next_hsp} = Bio::Search::HSP::BlastPullHSP->new(-parent => $self,
245             -chunk => [$self->chunk, $start, $end]);
246             }
247              
248             sub _discover_num_hsps {
249 4     4   6 my $self = shift;
250 4         9 $self->_fields->{num_hsps} = $self->hsps;
251             }
252              
253             =head2 next_hsp
254              
255             Title : next_hsp
256             Usage : while( $hsp = $obj->next_hsp()) { ... }
257             Function : Returns the next available High Scoring Pair
258             Example :
259             Returns : L object or null if finished
260             Args : none
261              
262             =cut
263              
264             sub next_hsp {
265 177     177 1 153 my $self = shift;
266 177         327 my $hsp = $self->get_field('next_hsp');
267 177         298 undef $self->_fields->{next_hsp};
268 177         347 return $hsp;
269             }
270              
271             =head2 hsps
272              
273             Usage : $hit_object->hsps();
274             Purpose : Get a list containing all HSP objects.
275             Example : @hsps = $hit_object->hsps();
276             Returns : list of L objects.
277             Argument : none
278              
279             =cut
280              
281             sub hsps {
282 28     28 1 35 my $self = shift;
283 28         35 my $old = $self->{_end_of_previous_hsp};
284 28         73 $self->rewind;
285 28         33 my @hsps;
286 28         54 while (defined(my $hsp = $self->next_hsp)) {
287 140         247 push(@hsps, $hsp);
288             }
289 28         41 $self->{_end_of_previous_hsp} = $old;
290 28         84 return @hsps;
291             }
292              
293             =head2 hsp
294              
295             Usage : $hit_object->hsp( [string] );
296             Purpose : Get a single HSPI object for the present HitI object.
297             Example : $hspObj = $hit_object->hsp; # same as 'best'
298             : $hspObj = $hit_object->hsp('best');
299             : $hspObj = $hit_object->hsp('worst');
300             Returns : Object reference for a L object.
301             Argument : String (or no argument).
302             : No argument (default) = highest scoring HSP (same as 'best').
303             : 'best' = highest scoring HSP.
304             : 'worst' = lowest scoring HSP.
305             Throws : Exception if an unrecognized argument is used.
306              
307             See Also : L, L()
308              
309             =cut
310              
311             sub hsp {
312 0     0 1 0 my ($self, $type) = @_;
313 0   0     0 $type ||= 'best';
314 0         0 $self->throw_not_implemented;
315             }
316              
317             =head2 rewind
318              
319             Title : rewind
320             Usage : $result->rewind;
321             Function: Allow one to reset the HSP iterator to the beginning, so that
322             next_hsp() will subsequently return the first hsp and so on.
323             Returns : n/a
324             Args : none
325              
326             =cut
327              
328             sub rewind {
329 28     28 1 31 my $self = shift;
330 28         49 delete $self->{_end_of_previous_hsp};
331             }
332              
333             # have p() a synonym of significance()
334             sub p {
335 0     0 1   return shift->significance;
336             }
337              
338             1;