File Coverage

Bio/Search/HSP/BlastPullHSP.pm
Criterion Covered Total %
statement 145 156 92.9
branch 55 68 80.8
condition 19 27 70.3
subroutine 12 13 92.3
pod 8 8 100.0
total 239 272 87.8


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::HSP::BlastPullHSP
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::HSP::BlastPullHSP - A parser and HSP object for BlastN hsps
17              
18             =head1 SYNOPSIS
19              
20             # generally we use Bio::SearchIO to build these objects
21             use Bio::SearchIO;
22             my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
23             -file => 'result.blast');
24              
25             while (my $result = $in->next_result) {
26             while (my $hit = $result->next_hit) {
27             print $hit->name, "\n";
28             print $hit->score, "\n";
29             print $hit->significance, "\n";
30              
31             while (my $hsp = $hit->next_hsp) {
32             # process HSPI objects
33             }
34             }
35             }
36              
37             =head1 DESCRIPTION
38              
39             This object implements a parser for BlastN hsp output.
40              
41             =head1 FEEDBACK
42              
43             =head2 Mailing Lists
44              
45             User feedback is an integral part of the evolution of this and other
46             Bioperl modules. Send your comments and suggestions preferably to
47             the Bioperl mailing list. Your participation is much appreciated.
48              
49             bioperl-l@bioperl.org - General discussion
50             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51              
52             =head2 Support
53              
54             Please direct usage questions or support issues to the mailing list:
55              
56             I
57              
58             rather than to the module maintainer directly. Many experienced and
59             reponsive experts will be able look at the problem and quickly
60             address it. Please include a thorough description of the problem
61             with code and data examples if at all possible.
62              
63             =head2 Reporting Bugs
64              
65             Report bugs to the Bioperl bug tracking system to help us keep track
66             of the bugs and their resolution. Bug reports can be submitted via the
67             web:
68              
69             https://github.com/bioperl/bioperl-live/issues
70              
71             =head1 AUTHOR - Sendu Bala
72              
73             Email bix@sendu.me.uk
74              
75             =head1 APPENDIX
76              
77             The rest of the documentation details each of the object methods.
78             Internal methods are usually preceded with a _
79              
80             =cut
81              
82             # Let the code begin...
83              
84             package Bio::Search::HSP::BlastPullHSP;
85              
86 1     1   4 use strict;
  1         2  
  1         25  
87 1     1   4 use base qw(Bio::Search::HSP::PullHSPI);
  1         1  
  1         445  
88              
89             =head2 new
90              
91             Title : new
92             Usage : my $obj = Bio::Search::HSP::BlastNHSP->new();
93             Function: Builds a new Bio::Search::HSP::BlastNHSP object.
94             Returns : Bio::Search::HSP::BlastNHSP
95             Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
96             -parent => Bio::PullParserI object (required if no -chunk)
97              
98             where the array ref provided to -chunk contains an IO object
99             for a filehandle to something representing the raw data of the
100             hsp, and $start and $end define the tell() position within the
101             filehandle that the hsp data starts and ends (optional; defaults
102             to start and end of the entire thing described by the filehandle)
103              
104             =cut
105              
106             sub new {
107 145     145 1 331 my ($class, @args) = @_;
108 145         469 my $self = $class->SUPER::new(@args);
109            
110 145         467 $self->_setup(@args);
111            
112 145         284 my $fields = $self->_fields;
113 145         296 foreach my $field (qw( header alignment query_strand hit_strand )) {
114 580         913 $fields->{$field} = undef;
115             }
116            
117 145         1803 $self->_dependencies( { ( score => 'header',
118             bits => 'header',
119             evalue => 'header',
120             total_gaps => 'header',
121             query_strand => 'header',
122             hit_strand => 'header',
123             alignment => 'header',
124             query_string => 'alignment',
125             hit_string => 'alignment',
126             homology_string => 'alignment',
127             query_start => 'alignment',
128             query_end => 'alignment',
129             hit_start => 'alignment',
130             hit_end => 'alignment',
131             hit_identical_inds => 'seq_inds',
132             hit_conserved_inds => 'seq_inds',
133             hit_nomatch_inds => 'seq_inds',
134             hit_gap_inds => 'seq_inds',
135             query_identical_inds => 'seq_inds',
136             query_conserved_inds => 'seq_inds',
137             query_nomatch_inds => 'seq_inds',
138             query_gap_inds => 'seq_inds' ) } );
139            
140 145         477 return $self;
141             }
142              
143             #
144             # PullParserI discovery methods so we can answer all HitI questions
145             #
146              
147             sub _discover_header {
148 136     136   175 my $self = shift;
149 136         336 $self->_chunk_seek(0);
150 136         304 my $header = $self->_get_chunk_by_end("\nQuery");
151 136         300 $self->{_after_header} = $self->_chunk_tell;
152            
153             ($self->_fields->{bits}, $self->_fields->{score}, $self->_fields->{evalue},
154             $self->_fields->{total_gaps}, $self->_fields->{query_strand}, $self->_fields->{hit_strand})
155 136         1585 = $header =~ /^\s*(\S+) bits \((\d+)\),\s+Expect = (\S+)(?:\s+.+Gaps = (\d+))?(?:.+Strand\s*=\s*(\w+)\s*\/\s*(\w+))?/sm;
156            
157 136 100       305 if ($self->_fields->{query_strand}) {
158             # protein blasts don't have strand
159 128         227 for my $strand_type ('query_strand', 'hit_strand') {
160 256 100       361 $self->_fields->{$strand_type} = $self->_fields->{$strand_type} eq 'Plus' ? 1 : -1;
161             }
162             }
163             else {
164 8         15 $self->_fields->{query_strand} = 0;
165 8         15 $self->_fields->{hit_strand} = 0;
166             }
167            
168 136 100       255 if ($self->_fields->{evalue} =~ /^e/) {
169 6         20 $self->_fields->{evalue} = '1'.$self->_fields->{evalue};
170             }
171            
172             # query_gaps isn't always given
173 136 100       226 $self->_fields->{total_gaps} = '[unset]' unless $self->_fields->{total_gaps};
174            
175 136         220 $self->_fields->{header} = 1;
176             }
177              
178             sub _discover_alignment {
179 136     136   171 my $self = shift;
180 136         327 $self->_chunk_seek($self->{_after_header});
181            
182             # work out various basic fields for the hsp
183             # (quicker to do this all at once instead of each method working on
184             # alignment itself)
185 136         241 my ($query_string, $hit_string, $homology_string, $q_start, $h_start, $q_end, $h_end);
186 136   100     241 while (my $strip = $self->_get_chunk_by_end("\nQuery") || $self->_get_chunk_by_nol(4)) {
187 489 50       3271 $strip =~ /\s+(\d+)\s+(\S+)\s+(\d+)\s+(\S.+)\nSbjct:?\s+(\d+)\s+(\S+)\s+(\d+)/gm || last;
188 489         1010 my $q1 = $1;
189 489         863 $query_string .= $2;
190 489         695 my $q2 = $3;
191 489         694 my $hom = $4;
192 489         551 my $h1 = $5;
193 489         639 $hit_string .= $6;
194 489         525 my $h2 = $7;
195            
196 489         1108 $hom = ' 'x(length($6) - length($hom)).$hom;
197 489         660 $homology_string .= $hom;
198            
199 489         731 for my $q ($q1, $q2) {
200 978 100 66     2888 if (! defined $q_start || $q < $q_start) {
201 136         167 $q_start = $q;
202             }
203 978 50 66     2325 if (! defined $q_end || $q > $q_end) {
204 978         1183 $q_end = $q;
205             }
206             }
207 489         638 for my $h ($h1, $h2) {
208 978 100 100     2202 if (! defined $h_start || $h < $h_start) {
209 222         247 $h_start = $h;
210             }
211 978 100 100     2183 if (! defined $h_end || $h > $h_end) {
212 892         1671 $h_end = $h;
213             }
214             }
215             }
216            
217 136         274 $self->_fields->{query_string} = $query_string;
218 136         265 $self->_fields->{hit_string} = $hit_string;
219 136         301 $self->_fields->{homology_string} = $homology_string;
220            
221 136         262 $self->_fields->{query_start} = $q_start;
222 136         242 $self->_fields->{query_end} = $q_end;
223 136         252 $self->_fields->{hit_start} = $h_start;
224 136         271 $self->_fields->{hit_end} = $h_end;
225            
226 136         352 ($self->{_query_gaps}) = $query_string =~ tr/-//;
227 136         269 ($self->{_hit_gaps}) = $hit_string =~ tr/-//;
228 136         498 ($self->{_total_gaps}) = $self->{_query_gaps} + $self->{_hit_gaps};
229            
230 136         257 $self->_fields->{alignment} = 1; # stop this method being called again
231             }
232              
233             # seq_inds related methods, all just need seq_inds field to have been gotten
234             sub _discover_seq_inds {
235 126     126   169 my $self = shift;
236 126         268 my ($seqString, $qseq, $sseq) = ( $self->get_field('homology_string'),
237             $self->get_field('query_string'),
238             $self->get_field('hit_string') );
239            
240             # (code largely lifted from GenericHSP)
241            
242             # Using hashes to avoid saving duplicate residue numbers.
243 126         257 my %identicalList_query = ();
244 126         151 my %identicalList_sbjct = ();
245 126         180 my %conservedList_query = ();
246 126         155 my %conservedList_sbjct = ();
247 126         186 my @gapList_query = ();
248 126         136 my @gapList_sbjct = ();
249 126         145 my %nomatchList_query = ();
250 126         142 my %nomatchList_sbjct = ();
251            
252 126         248 my $resCount_query = $self->get_field('query_end');
253 126         275 my $resCount_sbjct = $self->get_field('hit_end');
254            
255 126         208 my ($mchar, $schar, $qchar);
256 126         288 while ($mchar = chop($seqString) ) {
257 24590         26985 ($qchar, $schar) = (chop($qseq), chop($sseq));
258            
259 24590 50 33     70711 if ($mchar eq '+' || $mchar eq '.' || $mchar eq ':') {
    100 33        
260 0         0 $conservedList_query{ $resCount_query } = 1;
261 0         0 $conservedList_sbjct{ $resCount_sbjct } = 1;
262             }
263             elsif ($mchar eq ' ') {
264 414         546 $nomatchList_query{ $resCount_query } = 1;
265 414         558 $nomatchList_sbjct{ $resCount_sbjct } = 1;
266             }
267             else {
268 24176         32839 $identicalList_query{ $resCount_query } = 1;
269 24176         29344 $identicalList_sbjct{ $resCount_sbjct } = 1;
270             }
271            
272 24590 100       25755 if ($qchar eq '-') {
273 4         9 push(@gapList_query, $resCount_query);
274             }
275             else {
276 24586         21415 $resCount_query -= 1;
277             }
278 24590 100       24548 if ($schar eq '-') {
279 5         17 push(@gapList_sbjct, $resCount_sbjct);
280             }
281             else {
282 24585         32567 $resCount_sbjct -= 1;
283             }
284             }
285            
286 126         465 my $fields = $self->_fields;
287 126         3835 $fields->{hit_identical_inds} = [ sort { $a <=> $b } keys %identicalList_sbjct ];
  167930         147119  
288 126         999 $fields->{hit_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_sbjct ];
  0         0  
289 126         446 $fields->{hit_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_sbjct ];
  917         926  
290 126         268 $fields->{hit_gap_inds} = [ reverse @gapList_sbjct ];
291 126         2705 $fields->{query_identical_inds} = [ sort { $a <=> $b } keys %identicalList_query ];
  167604         145858  
292 126         943 $fields->{query_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_query ];
  0         0  
293 126         331 $fields->{query_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_query ];
  893         919  
294 126         238 $fields->{query_gap_inds} = [ reverse @gapList_query ];
295            
296 126         3956 $fields->{seq_inds} = 1;
297             }
298              
299             =head2 query
300              
301             Title : query
302             Usage : my $query = $hsp->query
303             Function: Returns a SeqFeature representing the query in the HSP
304             Returns : L
305             Args : none
306              
307             =cut
308              
309             sub query {
310 22     22 1 58 my $self = shift;
311 22 100       59 unless ($self->{_created_query}) {
312 5         13 $self->SUPER::query( new Bio::SeqFeature::Similarity
313             ('-primary' => $self->primary_tag,
314             '-start' => $self->get_field('query_start'),
315             '-end' => $self->get_field('query_end'),
316             '-expect' => $self->get_field('evalue'),
317             '-score' => $self->get_field('score'),
318             '-strand' => $self->get_field('query_strand'),
319             '-seq_id' => $self->get_field('query_name'),
320             '-seqlength'=> $self->get_field('query_length'),
321             '-source' => $self->get_field('algorithm'),
322             '-seqdesc' => $self->get_field('query_description'),
323             '-frame' => 0 # not known?
324             ) );
325 5         21 $self->{_created_query} = 1;
326             }
327 22         78 return $self->SUPER::query(@_);
328             }
329              
330             =head2 hit
331              
332             Title : hit
333             Usage : my $hit = $hsp->hit
334             Function: Returns a SeqFeature representing the hit in the HSP
335             Returns : L
336             Args : [optional] new value to set
337              
338             =cut
339              
340             sub hit {
341 31     31 1 71 my $self = shift;
342 31 100       76 unless ($self->{_created_hit}) {
343 4         19 $self->SUPER::hit( new Bio::SeqFeature::Similarity
344             ('-primary' => $self->primary_tag,
345             '-start' => $self->get_field('hit_start'),
346             '-end' => $self->get_field('hit_end'),
347             '-expect' => $self->get_field('evalue'),
348             '-score' => $self->get_field('score'),
349             '-strand' => $self->get_field('hit_strand'),
350             '-seq_id' => $self->get_field('name'),
351             '-seqlength'=> $self->get_field('length'),
352             '-source' => $self->get_field('algorithm'),
353             '-seqdesc' => $self->get_field('description'),
354             '-frame' => 0 # not known?
355             ) );
356 4         11 $self->{_created_hit} = 1;
357             }
358 31         99 return $self->SUPER::hit(@_);
359             }
360              
361             =head2 gaps
362              
363             Title : gaps
364             Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
365             Function : Get the number of gap characters in the query, hit, or total alignment.
366             Returns : Integer, number of gap characters or 0 if none
367             Args : 'query' = num conserved / length of query seq (without gaps)
368             'hit' = num conserved / length of hit seq (without gaps)
369             'total' = num conserved / length of alignment (with gaps)
370             default = 'total'
371              
372             =cut
373              
374             sub gaps {
375 14     14 1 31 my ($self, $type) = @_;
376            
377 14 100       53 $type = lc $type if defined $type;
378 14 50 66     127 $type = 'total' if (! defined $type || $type eq 'hsp' || $type !~ /query|hit|subject|sbjct|total/);
      66        
379 14 50       38 $type = 'hit' if $type =~ /sbjct|subject/;
380            
381 14 100       34 if ($type eq 'total') {
382 4         18 my $answer = $self->get_field('total_gaps');
383 4 100       21 return $answer unless $answer eq '[unset]';
384             }
385            
386 13         40 $self->get_field('alignment'); # make sure gaps have been calculated
387            
388 13         77 return $self->{'_'.$type.'_gaps'};
389             }
390              
391             =head2 strand
392              
393             Title : strand
394             Usage : $hsp->strand('query')
395             Function: Retrieves the strand for the HSP component requested
396             Returns : +1 or -1 (0 if unknown)
397             Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
398             'query' to retrieve the query strand (default)
399             'list' or 'array' to retrieve both query and hit together
400              
401             =cut
402              
403             sub strand {
404 7     7 1 17 my $self = shift;
405 7         13 my $val = shift;
406 7 50       20 $val = 'query' unless defined $val;
407 7         19 $val =~ s/^\s+//;
408              
409 7 100       32 if ($val =~ /^q/i) {
    50          
    0          
410 2         4 return $self->get_field('query_strand');
411             }
412             elsif ($val =~ /^hi|^s/i) {
413 5         17 return $self->get_field('hit_strand');
414             }
415             elsif ($val =~ /^list|array/i) {
416 0         0 return ($self->get_field('query_strand'), $self->get_field('hit_strand'));
417             }
418             else {
419 0         0 $self->warn("unrecognized component '$val' requested\n");
420             }
421 0         0 return 0;
422             }
423              
424             =head2 start
425              
426             Title : start
427             Usage : $hsp->start('query')
428             Function: Retrieves the start for the HSP component requested
429             Returns : integer, or list of two integers (query start and subject start) in
430             list context
431             Args : 'hit' or 'subject' or 'sbjct' to retrieve the start of the subject
432             'query' to retrieve the query start (default)
433              
434             =cut
435              
436             sub start {
437 16     16 1 36 my $self = shift;
438 16         26 my $val = shift;
439 16 50       54 $val = (wantarray ? 'list' : 'query') unless defined $val;
    100          
440 16         59 $val =~ s/^\s+//;
441            
442 16 100       121 if ($val =~ /^q/i) {
    100          
    50          
443 2         4 return $self->get_field('query_start');
444             }
445             elsif ($val =~ /^(hi|s)/i) {
446 5         19 return $self->get_field('hit_start');
447             }
448             elsif ($val =~ /^list|array/i) {
449 9         31 return ($self->get_field('query_start'), $self->get_field('hit_start') );
450             }
451             else {
452 0         0 $self->warn("unrecognized component '$val' requested\n");
453             }
454 0         0 return 0;
455             }
456              
457             =head2 end
458              
459             Title : end
460             Usage : $hsp->end('query')
461             Function: Retrieves the end for the HSP component requested
462             Returns : integer, or list of two integers (query end and subject end) in
463             list context
464             Args : 'hit' or 'subject' or 'sbjct' to retrieve the end of the subject
465             'query' to retrieve the query end (default)
466              
467             =cut
468              
469             sub end {
470 16     16 1 28 my $self = shift;
471 16         32 my $val = shift;
472 16 50       52 $val = (wantarray ? 'list' : 'query') unless defined $val;
    100          
473 16         48 $val =~ s/^\s+//;
474            
475 16 100       97 if ($val =~ /^q/i) {
    100          
    50          
476 5         18 return $self->get_field('query_end');
477             }
478             elsif ($val =~ /^(hi|s)/i) {
479 2         3 return $self->get_field('hit_end');
480             }
481             elsif ($val =~ /^list|array/i) {
482 9         25 return ($self->get_field('query_end'), $self->get_field('hit_end'));
483             }
484             else {
485 0           $self->warn("unrecognized end component '$val' requested\n");
486             }
487 0           return 0;
488             }
489              
490             =head2 pvalue
491              
492             Title : pvalue
493             Usage : my $pvalue = $hsp->pvalue();
494             Function: Returns the P-value for this HSP
495             Returns : undef (Hmmpfam reports do not have p-values)
496             Args : none
497              
498             =cut
499              
500       0 1   sub pvalue { }
501              
502             1;