File Coverage

Bio/Search/HSP/BlastPullHSP.pm
Criterion Covered Total %
statement 145 156 92.9
branch 55 68 80.8
condition 19 27 70.3
subroutine 12 13 92.3
pod 8 8 100.0
total 239 272 87.8


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::HSP::BlastPullHSP
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::HSP::BlastPullHSP - A parser and HSP object for BlastN hsps
17              
18             =head1 SYNOPSIS
19              
20             # generally we use Bio::SearchIO to build these objects
21             use Bio::SearchIO;
22             my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
23             -file => 'result.blast');
24              
25             while (my $result = $in->next_result) {
26             while (my $hit = $result->next_hit) {
27             print $hit->name, "\n";
28             print $hit->score, "\n";
29             print $hit->significance, "\n";
30              
31             while (my $hsp = $hit->next_hsp) {
32             # process HSPI objects
33             }
34             }
35             }
36              
37             =head1 DESCRIPTION
38              
39             This object implements a parser for BlastN hsp output.
40              
41             =head1 FEEDBACK
42              
43             =head2 Mailing Lists
44              
45             User feedback is an integral part of the evolution of this and other
46             Bioperl modules. Send your comments and suggestions preferably to
47             the Bioperl mailing list. Your participation is much appreciated.
48              
49             bioperl-l@bioperl.org - General discussion
50             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
51              
52             =head2 Support
53              
54             Please direct usage questions or support issues to the mailing list:
55              
56             I
57              
58             rather than to the module maintainer directly. Many experienced and
59             reponsive experts will be able look at the problem and quickly
60             address it. Please include a thorough description of the problem
61             with code and data examples if at all possible.
62              
63             =head2 Reporting Bugs
64              
65             Report bugs to the Bioperl bug tracking system to help us keep track
66             of the bugs and their resolution. Bug reports can be submitted via the
67             web:
68              
69             https://github.com/bioperl/bioperl-live/issues
70              
71             =head1 AUTHOR - Sendu Bala
72              
73             Email bix@sendu.me.uk
74              
75             =head1 APPENDIX
76              
77             The rest of the documentation details each of the object methods.
78             Internal methods are usually preceded with a _
79              
80             =cut
81              
82             # Let the code begin...
83              
84             package Bio::Search::HSP::BlastPullHSP;
85              
86 1     1   4 use strict;
  1         0  
  1         23  
87 1     1   3 use base qw(Bio::Search::HSP::PullHSPI);
  1         1  
  1         495  
88              
89             =head2 new
90              
91             Title : new
92             Usage : my $obj = Bio::Search::HSP::BlastNHSP->new();
93             Function: Builds a new Bio::Search::HSP::BlastNHSP object.
94             Returns : Bio::Search::HSP::BlastNHSP
95             Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
96             -parent => Bio::PullParserI object (required if no -chunk)
97              
98             where the array ref provided to -chunk contains an IO object
99             for a filehandle to something representing the raw data of the
100             hsp, and $start and $end define the tell() position within the
101             filehandle that the hsp data starts and ends (optional; defaults
102             to start and end of the entire thing described by the filehandle)
103              
104             =cut
105              
106             sub new {
107 145     145 1 237 my ($class, @args) = @_;
108 145         400 my $self = $class->SUPER::new(@args);
109            
110 145         329 $self->_setup(@args);
111            
112 145         219 my $fields = $self->_fields;
113 145         202 foreach my $field (qw( header alignment query_strand hit_strand )) {
114 580         672 $fields->{$field} = undef;
115             }
116            
117 145         1310 $self->_dependencies( { ( score => 'header',
118             bits => 'header',
119             evalue => 'header',
120             total_gaps => 'header',
121             query_strand => 'header',
122             hit_strand => 'header',
123             alignment => 'header',
124             query_string => 'alignment',
125             hit_string => 'alignment',
126             homology_string => 'alignment',
127             query_start => 'alignment',
128             query_end => 'alignment',
129             hit_start => 'alignment',
130             hit_end => 'alignment',
131             hit_identical_inds => 'seq_inds',
132             hit_conserved_inds => 'seq_inds',
133             hit_nomatch_inds => 'seq_inds',
134             hit_gap_inds => 'seq_inds',
135             query_identical_inds => 'seq_inds',
136             query_conserved_inds => 'seq_inds',
137             query_nomatch_inds => 'seq_inds',
138             query_gap_inds => 'seq_inds' ) } );
139            
140 145         336 return $self;
141             }
142              
143             #
144             # PullParserI discovery methods so we can answer all HitI questions
145             #
146              
147             sub _discover_header {
148 136     136   114 my $self = shift;
149 136         230 $self->_chunk_seek(0);
150 136         219 my $header = $self->_get_chunk_by_end("\nQuery");
151 136         218 $self->{_after_header} = $self->_chunk_tell;
152            
153             ($self->_fields->{bits}, $self->_fields->{score}, $self->_fields->{evalue},
154             $self->_fields->{total_gaps}, $self->_fields->{query_strand}, $self->_fields->{hit_strand})
155 136         1292 = $header =~ /^\s*(\S+) bits \((\d+)\),\s+Expect = (\S+)(?:\s+.+Gaps = (\d+))?(?:.+Strand\s*=\s*(\w+)\s*\/\s*(\w+))?/sm;
156            
157 136 100       333 if ($self->_fields->{query_strand}) {
158             # protein blasts don't have strand
159 128         170 for my $strand_type ('query_strand', 'hit_strand') {
160 256 100       302 $self->_fields->{$strand_type} = $self->_fields->{$strand_type} eq 'Plus' ? 1 : -1;
161             }
162             }
163             else {
164 8         18 $self->_fields->{query_strand} = 0;
165 8         14 $self->_fields->{hit_strand} = 0;
166             }
167            
168 136 100       194 if ($self->_fields->{evalue} =~ /^e/) {
169 6         16 $self->_fields->{evalue} = '1'.$self->_fields->{evalue};
170             }
171            
172             # query_gaps isn't always given
173 136 100       228 $self->_fields->{total_gaps} = '[unset]' unless $self->_fields->{total_gaps};
174            
175 136         205 $self->_fields->{header} = 1;
176             }
177              
178             sub _discover_alignment {
179 136     136   134 my $self = shift;
180 136         256 $self->_chunk_seek($self->{_after_header});
181            
182             # work out various basic fields for the hsp
183             # (quicker to do this all at once instead of each method working on
184             # alignment itself)
185 136         114 my ($query_string, $hit_string, $homology_string, $q_start, $h_start, $q_end, $h_end);
186 136   100     231 while (my $strip = $self->_get_chunk_by_end("\nQuery") || $self->_get_chunk_by_nol(4)) {
187 489 50       2749 $strip =~ /\s+(\d+)\s+(\S+)\s+(\d+)\s+(\S.+)\nSbjct:?\s+(\d+)\s+(\S+)\s+(\d+)/gm || last;
188 489         623 my $q1 = $1;
189 489         619 $query_string .= $2;
190 489         458 my $q2 = $3;
191 489         547 my $hom = $4;
192 489         403 my $h1 = $5;
193 489         436 $hit_string .= $6;
194 489         504 my $h2 = $7;
195            
196 489         784 $hom = ' 'x(length($6) - length($hom)).$hom;
197 489         529 $homology_string .= $hom;
198            
199 489         578 for my $q ($q1, $q2) {
200 978 100 66     2664 if (! defined $q_start || $q < $q_start) {
201 136         144 $q_start = $q;
202             }
203 978 50 66     2274 if (! defined $q_end || $q > $q_end) {
204 978         921 $q_end = $q;
205             }
206             }
207 489         448 for my $h ($h1, $h2) {
208 978 100 100     2287 if (! defined $h_start || $h < $h_start) {
209 222         170 $h_start = $h;
210             }
211 978 100 100     2297 if (! defined $h_end || $h > $h_end) {
212 892         1549 $h_end = $h;
213             }
214             }
215             }
216            
217 136         252 $self->_fields->{query_string} = $query_string;
218 136         190 $self->_fields->{hit_string} = $hit_string;
219 136         177 $self->_fields->{homology_string} = $homology_string;
220            
221 136         227 $self->_fields->{query_start} = $q_start;
222 136         216 $self->_fields->{query_end} = $q_end;
223 136         194 $self->_fields->{hit_start} = $h_start;
224 136         178 $self->_fields->{hit_end} = $h_end;
225            
226 136         241 ($self->{_query_gaps}) = $query_string =~ tr/-//;
227 136         206 ($self->{_hit_gaps}) = $hit_string =~ tr/-//;
228 136         370 ($self->{_total_gaps}) = $self->{_query_gaps} + $self->{_hit_gaps};
229            
230 136         183 $self->_fields->{alignment} = 1; # stop this method being called again
231             }
232              
233             # seq_inds related methods, all just need seq_inds field to have been gotten
234             sub _discover_seq_inds {
235 126     126   99 my $self = shift;
236 126         209 my ($seqString, $qseq, $sseq) = ( $self->get_field('homology_string'),
237             $self->get_field('query_string'),
238             $self->get_field('hit_string') );
239            
240             # (code largely lifted from GenericHSP)
241            
242             # Using hashes to avoid saving duplicate residue numbers.
243 126         206 my %identicalList_query = ();
244 126         139 my %identicalList_sbjct = ();
245 126         132 my %conservedList_query = ();
246 126         122 my %conservedList_sbjct = ();
247 126         129 my @gapList_query = ();
248 126         125 my @gapList_sbjct = ();
249 126         97 my %nomatchList_query = ();
250 126         103 my %nomatchList_sbjct = ();
251            
252 126         205 my $resCount_query = $self->get_field('query_end');
253 126         198 my $resCount_sbjct = $self->get_field('hit_end');
254            
255 126         105 my ($mchar, $schar, $qchar);
256 126         254 while ($mchar = chop($seqString) ) {
257 24590         18093 ($qchar, $schar) = (chop($qseq), chop($sseq));
258            
259 24590 50 33     85454 if ($mchar eq '+' || $mchar eq '.' || $mchar eq ':') {
    100 33        
260 0         0 $conservedList_query{ $resCount_query } = 1;
261 0         0 $conservedList_sbjct{ $resCount_sbjct } = 1;
262             }
263             elsif ($mchar eq ' ') {
264 414         377 $nomatchList_query{ $resCount_query } = 1;
265 414         430 $nomatchList_sbjct{ $resCount_sbjct } = 1;
266             }
267             else {
268 24176         21084 $identicalList_query{ $resCount_query } = 1;
269 24176         21124 $identicalList_sbjct{ $resCount_sbjct } = 1;
270             }
271            
272 24590 100       20213 if ($qchar eq '-') {
273 4         6 push(@gapList_query, $resCount_query);
274             }
275             else {
276 24586         14303 $resCount_query -= 1;
277             }
278 24590 100       20089 if ($schar eq '-') {
279 5         13 push(@gapList_sbjct, $resCount_sbjct);
280             }
281             else {
282 24585         28854 $resCount_sbjct -= 1;
283             }
284             }
285            
286 126         338 my $fields = $self->_fields;
287 126         3074 $fields->{hit_identical_inds} = [ sort { $a <=> $b } keys %identicalList_sbjct ];
  167651         95297  
288 126         757 $fields->{hit_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_sbjct ];
  0         0  
289 126         238 $fields->{hit_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_sbjct ];
  917         623  
290 126         199 $fields->{hit_gap_inds} = [ reverse @gapList_sbjct ];
291 126         2089 $fields->{query_identical_inds} = [ sort { $a <=> $b } keys %identicalList_query ];
  167847         95234  
292 126         722 $fields->{query_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_query ];
  0         0  
293 126         207 $fields->{query_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_query ];
  920         636  
294 126         193 $fields->{query_gap_inds} = [ reverse @gapList_query ];
295            
296 126         3279 $fields->{seq_inds} = 1;
297             }
298              
299             =head2 query
300              
301             Title : query
302             Usage : my $query = $hsp->query
303             Function: Returns a SeqFeature representing the query in the HSP
304             Returns : L
305             Args : none
306              
307             =cut
308              
309             sub query {
310 22     22 1 43 my $self = shift;
311 22 100       45 unless ($self->{_created_query}) {
312 5         19 $self->SUPER::query( new Bio::SeqFeature::Similarity
313             ('-primary' => $self->primary_tag,
314             '-start' => $self->get_field('query_start'),
315             '-end' => $self->get_field('query_end'),
316             '-expect' => $self->get_field('evalue'),
317             '-score' => $self->get_field('score'),
318             '-strand' => $self->get_field('query_strand'),
319             '-seq_id' => $self->get_field('query_name'),
320             '-seqlength'=> $self->get_field('query_length'),
321             '-source' => $self->get_field('algorithm'),
322             '-seqdesc' => $self->get_field('query_description'),
323             '-frame' => 0 # not known?
324             ) );
325 5         23 $self->{_created_query} = 1;
326             }
327 22         52 return $self->SUPER::query(@_);
328             }
329              
330             =head2 hit
331              
332             Title : hit
333             Usage : my $hit = $hsp->hit
334             Function: Returns a SeqFeature representing the hit in the HSP
335             Returns : L
336             Args : [optional] new value to set
337              
338             =cut
339              
340             sub hit {
341 31     31 1 40 my $self = shift;
342 31 100       59 unless ($self->{_created_hit}) {
343 4         14 $self->SUPER::hit( new Bio::SeqFeature::Similarity
344             ('-primary' => $self->primary_tag,
345             '-start' => $self->get_field('hit_start'),
346             '-end' => $self->get_field('hit_end'),
347             '-expect' => $self->get_field('evalue'),
348             '-score' => $self->get_field('score'),
349             '-strand' => $self->get_field('hit_strand'),
350             '-seq_id' => $self->get_field('name'),
351             '-seqlength'=> $self->get_field('length'),
352             '-source' => $self->get_field('algorithm'),
353             '-seqdesc' => $self->get_field('description'),
354             '-frame' => 0 # not known?
355             ) );
356 4         15 $self->{_created_hit} = 1;
357             }
358 31         71 return $self->SUPER::hit(@_);
359             }
360              
361             =head2 gaps
362              
363             Title : gaps
364             Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
365             Function : Get the number of gap characters in the query, hit, or total alignment.
366             Returns : Integer, number of gap characters or 0 if none
367             Args : 'query' = num conserved / length of query seq (without gaps)
368             'hit' = num conserved / length of hit seq (without gaps)
369             'total' = num conserved / length of alignment (with gaps)
370             default = 'total'
371              
372             =cut
373              
374             sub gaps {
375 14     14 1 18 my ($self, $type) = @_;
376            
377 14 100       32 $type = lc $type if defined $type;
378 14 50 66     118 $type = 'total' if (! defined $type || $type eq 'hsp' || $type !~ /query|hit|subject|sbjct|total/);
      66        
379 14 50       28 $type = 'hit' if $type =~ /sbjct|subject/;
380            
381 14 100       26 if ($type eq 'total') {
382 4         12 my $answer = $self->get_field('total_gaps');
383 4 100       14 return $answer unless $answer eq '[unset]';
384             }
385            
386 13         29 $self->get_field('alignment'); # make sure gaps have been calculated
387            
388 13         59 return $self->{'_'.$type.'_gaps'};
389             }
390              
391             =head2 strand
392              
393             Title : strand
394             Usage : $hsp->strand('query')
395             Function: Retrieves the strand for the HSP component requested
396             Returns : +1 or -1 (0 if unknown)
397             Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject
398             'query' to retrieve the query strand (default)
399             'list' or 'array' to retreive both query and hit together
400              
401             =cut
402              
403             sub strand {
404 7     7 1 10 my $self = shift;
405 7         7 my $val = shift;
406 7 50       16 $val = 'query' unless defined $val;
407 7         15 $val =~ s/^\s+//;
408              
409 7 100       28 if ($val =~ /^q/i) {
    50          
    0          
410 2         4 return $self->get_field('query_strand');
411             }
412             elsif ($val =~ /^hi|^s/i) {
413 5         15 return $self->get_field('hit_strand');
414             }
415             elsif ($val =~ /^list|array/i) {
416 0         0 return ($self->get_field('query_strand'), $self->get_field('hit_strand'));
417             }
418             else {
419 0         0 $self->warn("unrecognized component '$val' requested\n");
420             }
421 0         0 return 0;
422             }
423              
424             =head2 start
425              
426             Title : start
427             Usage : $hsp->start('query')
428             Function: Retrieves the start for the HSP component requested
429             Returns : integer, or list of two integers (query start and subject start) in
430             list context
431             Args : 'hit' or 'subject' or 'sbjct' to retrieve the start of the subject
432             'query' to retrieve the query start (default)
433              
434             =cut
435              
436             sub start {
437 16     16 1 18 my $self = shift;
438 16         16 my $val = shift;
439 16 50       39 $val = (wantarray ? 'list' : 'query') unless defined $val;
    100          
440 16         34 $val =~ s/^\s+//;
441            
442 16 100       70 if ($val =~ /^q/i) {
    100          
    50          
443 2         5 return $self->get_field('query_start');
444             }
445             elsif ($val =~ /^(hi|s)/i) {
446 5         15 return $self->get_field('hit_start');
447             }
448             elsif ($val =~ /^list|array/i) {
449 9         15 return ($self->get_field('query_start'), $self->get_field('hit_start') );
450             }
451             else {
452 0         0 $self->warn("unrecognized component '$val' requested\n");
453             }
454 0         0 return 0;
455             }
456              
457             =head2 end
458              
459             Title : end
460             Usage : $hsp->end('query')
461             Function: Retrieves the end for the HSP component requested
462             Returns : integer, or list of two integers (query end and subject end) in
463             list context
464             Args : 'hit' or 'subject' or 'sbjct' to retrieve the end of the subject
465             'query' to retrieve the query end (default)
466              
467             =cut
468              
469             sub end {
470 16     16 1 21 my $self = shift;
471 16         17 my $val = shift;
472 16 50       40 $val = (wantarray ? 'list' : 'query') unless defined $val;
    100          
473 16         34 $val =~ s/^\s+//;
474            
475 16 100       61 if ($val =~ /^q/i) {
    100          
    50          
476 5         15 return $self->get_field('query_end');
477             }
478             elsif ($val =~ /^(hi|s)/i) {
479 2         5 return $self->get_field('hit_end');
480             }
481             elsif ($val =~ /^list|array/i) {
482 9         17 return ($self->get_field('query_end'), $self->get_field('hit_end'));
483             }
484             else {
485 0           $self->warn("unrecognized end component '$val' requested\n");
486             }
487 0           return 0;
488             }
489              
490             =head2 pvalue
491              
492             Title : pvalue
493             Usage : my $pvalue = $hsp->pvalue();
494             Function: Returns the P-value for this HSP
495             Returns : undef (Hmmpfam reports do not have p-values)
496             Args : none
497              
498             =cut
499              
500       0 1   sub pvalue { }
501              
502             1;