File Coverage

Bio/Search/HSP/HmmpfamHSP.pm
Criterion Covered Total %
statement 109 109 100.0
branch 22 26 84.6
condition 7 15 46.6
subroutine 9 9 100.0
pod 5 5 100.0
total 152 164 92.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::HSP::HmmpfamHSP
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::HSP::HmmpfamHSP - A parser and HSP object for hmmpfam hsps
17              
18             =head1 SYNOPSIS
19              
20             # generally we use Bio::SearchIO to build these objects
21             use Bio::SearchIO;
22             my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
23             -file => 'result.hmmer');
24              
25             while (my $result = $in->next_result) {
26             while (my $hit = $result->next_hit) {
27             print $hit->name, "\n";
28             print $hit->score, "\n";
29             print $hit->significance, "\n";
30              
31             while (my $hsp = $hit->next_hsp) {
32             # process HSPI objects
33             }
34             }
35             }
36              
37             =head1 DESCRIPTION
38              
39             This object implements a parser for hmmpfam hsp output, a program in the HMMER
40             package.
41              
42             =head1 FEEDBACK
43              
44             =head2 Mailing Lists
45              
46             User feedback is an integral part of the evolution of this and other
47             Bioperl modules. Send your comments and suggestions preferably to
48             the Bioperl mailing list. Your participation is much appreciated.
49              
50             bioperl-l@bioperl.org - General discussion
51             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52              
53             =head2 Support
54              
55             Please direct usage questions or support issues to the mailing list:
56              
57             I
58              
59             rather than to the module maintainer directly. Many experienced and
60             reponsive experts will be able look at the problem and quickly
61             address it. Please include a thorough description of the problem
62             with code and data examples if at all possible.
63              
64             =head2 Reporting Bugs
65              
66             Report bugs to the Bioperl bug tracking system to help us keep track
67             of the bugs and their resolution. Bug reports can be submitted via the
68             web:
69              
70             https://github.com/bioperl/bioperl-live/issues
71              
72             =head1 AUTHOR - Sendu Bala
73              
74             Email bix@sendu.me.uk
75              
76             =head1 APPENDIX
77              
78             The rest of the documentation details each of the object methods.
79             Internal methods are usually preceded with a _
80              
81             =cut
82              
83             # Let the code begin...
84              
85             package Bio::Search::HSP::HmmpfamHSP;
86              
87 1     1   3 use strict;
  1         1  
  1         24  
88 1     1   3 use base qw(Bio::Search::HSP::PullHSPI);
  1         1  
  1         454  
89              
90             =head2 new
91              
92             Title : new
93             Usage : my $obj = Bio::Search::HSP::HmmpfamHSP->new();
94             Function: Builds a new Bio::Search::HSP::HmmpfamHSP object.
95             Returns : Bio::Search::HSP::HmmpfamHSP
96             Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
97             -parent => Bio::PullParserI object (required if no -chunk)
98             -hsp_data => array ref with [rank query_start query_end hit_start
99             hit_end score evalue]
100              
101             where the array ref provided to -chunk contains an IO object
102             for a filehandle to something representing the raw data of the
103             hsp, and $start and $end define the tell() position within the
104             filehandle that the hsp data starts and ends (optional; defaults
105             to start and end of the entire thing described by the filehandle)
106              
107             =cut
108              
109             sub new {
110 34     34 1 60 my ($class, @args) = @_;
111 34         91 my $self = $class->SUPER::new(@args);
112            
113 34         79 $self->_setup(@args);
114            
115 34         53 my $fields = $self->_fields;
116 34         55 foreach my $field (qw( alignment )) {
117 34         56 $fields->{$field} = undef;
118             }
119            
120 34         49 my $hsp_data = $self->_raw_hsp_data;
121 34 50 33     127 if ($hsp_data && ref($hsp_data) eq 'ARRAY') {
122 34         26 my @hsp_data = @{$hsp_data}; # don't alter the reference
  34         89  
123 34         47 foreach my $field (qw(rank query_start query_end hit_start hit_end score evalue)) {
124 238         288 $fields->{$field} = shift(@hsp_data);
125             }
126             }
127            
128 34         200 $self->_dependencies( { ( query_string => 'alignment',
129             hit_string => 'alignment',
130             homology_string => 'alignment',
131             hit_identical_inds => 'seq_inds',
132             hit_conserved_inds => 'seq_inds',
133             hit_nomatch_inds => 'seq_inds',
134             hit_gap_inds => 'seq_inds',
135             query_identical_inds => 'seq_inds',
136             query_conserved_inds => 'seq_inds',
137             query_nomatch_inds => 'seq_inds',
138             query_gap_inds => 'seq_inds' ) } );
139            
140 34         89 return $self;
141             }
142              
143             #
144             # PullParserI discovery methods so we can answer all HitI questions
145             #
146              
147             sub _discover_alignment {
148 25     25   26 my $self = shift;
149 25         44 my $alignments_hash = $self->get_field('alignments');
150            
151 25         45 my $identifier = $self->get_field('name').'~~~~'.$self->get_field('rank');
152            
153 25         60 while (! defined $alignments_hash->{$identifier}) {
154 24 100       46 last unless $self->parent->parent->_next_alignment;
155             }
156 25         34 my $alignment = $alignments_hash->{$identifier};
157            
158 25 100       39 if ($alignment) {
159             # work out query, hit and homology strings, and some stats
160             # (quicker to do this all at once instead of each method working on
161             # $alignment string itself)
162            
163 23         18 my ($query_string, $hit_string, $homology_string);
164 23         270 while ($alignment =~ /\s+(\S+)\n\s+(\S.+)\n\s+\S+\s+\d+\s+(\S+)\s+\d/gm) {
165 45         71 my $hi = $1;
166 45         43 my $ho = $2;
167 45         55 $query_string .= $3;
168            
169 45         78 $hi =~ s/\*\-\>//;
170 45         70 $ho = ' 'x(length($hi) - length($ho)).$ho;
171 45         62 $hi =~ s/\<\-\*//;
172            
173 45         45 $hit_string .= $hi;
174 45         118 $homology_string .= $ho;
175             }
176            
177 23         42 $self->_fields->{query_string} = $query_string;
178 23         33 $self->_fields->{hit_string} = $hit_string;
179 23         51 $homology_string =~ s/ $//;
180 23         33 $self->_fields->{homology_string} = $homology_string;
181            
182 23         50 ($self->{_query_gaps}) = $query_string =~ tr/-//;
183 23         28 ($self->{_hit_gaps}) = $hit_string =~ tr/.//;
184 23         42 ($self->{_total_gaps}) = $self->{_query_gaps} + $self->{_hit_gaps};
185             }
186            
187 25         40 $self->_fields->{alignment} = 1; # stop this method being called again
188             }
189              
190             # seq_inds related methods, all just need seq_inds field to have been gotten
191             sub _discover_seq_inds {
192 22     22   18 my $self = shift;
193 22         27 my ($seqString, $qseq, $sseq) = ( $self->get_field('homology_string'),
194             $self->get_field('query_string'),
195             $self->get_field('hit_string') );
196            
197             # (code largely lifted from GenericHSP)
198            
199             # Using hashes to avoid saving duplicate residue numbers.
200 22         31 my %identicalList_query = ();
201 22         20 my %identicalList_sbjct = ();
202 22         23 my %conservedList_query = ();
203 22         17 my %conservedList_sbjct = ();
204 22         18 my @gapList_query = ();
205 22         22 my @gapList_sbjct = ();
206 22         19 my %nomatchList_query = ();
207 22         19 my %nomatchList_sbjct = ();
208            
209 22         29 my $resCount_query = $self->get_field('query_end');
210 22         27 my $resCount_sbjct = $self->get_field('hit_end');
211            
212 22         26 my ($mchar, $schar, $qchar);
213 22         41 while ($mchar = chop($seqString) ) {
214 1705         1286 ($qchar, $schar) = (chop($qseq), chop($sseq));
215            
216 1705 100 66     4626 if ($mchar eq '+' || $mchar eq '.' || $mchar eq ':') {
    100 66        
217 616         476 $conservedList_query{ $resCount_query } = 1;
218 616         478 $conservedList_sbjct{ $resCount_sbjct } = 1;
219             }
220             elsif ($mchar eq ' ') {
221 484         400 $nomatchList_query{ $resCount_query } = 1;
222 484         408 $nomatchList_sbjct{ $resCount_sbjct } = 1;
223             }
224             else {
225 605         464 $identicalList_query{ $resCount_query } = 1;
226 605         498 $identicalList_sbjct{ $resCount_sbjct } = 1;
227             }
228            
229 1705 100       1480 if ($qchar eq '-') {
230 143         120 push(@gapList_query, $resCount_query);
231             }
232             else {
233 1562         838 $resCount_query -= 1;
234             }
235 1705 100       1386 if ($schar eq '.') {
236 11         22 push(@gapList_sbjct, $resCount_sbjct);
237             }
238             else {
239 1694         1979 $resCount_sbjct -= 1;
240             }
241             }
242            
243 22         48 my $fields = $self->_fields;
244 22         159 $fields->{hit_identical_inds} = [ sort { $a <=> $b } keys %identicalList_sbjct ];
  2138         1343  
245 22         95 $fields->{hit_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_sbjct ];
  2253         1348  
246 22         81 $fields->{hit_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_sbjct ];
  1606         1000  
247 22         42 $fields->{hit_gap_inds} = [ reverse @gapList_sbjct ];
248 22         77 $fields->{query_identical_inds} = [ sort { $a <=> $b } keys %identicalList_query ];
  2243         1330  
249 22         86 $fields->{query_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_query ];
  2236         1308  
250 22         64 $fields->{query_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_query ];
  1117         676  
251 22         56 $fields->{query_gap_inds} = [ reverse @gapList_query ];
252            
253 22         229 $fields->{seq_inds} = 1;
254             }
255              
256             =head2 query
257              
258             Title : query
259             Usage : my $query = $hsp->query
260             Function: Returns a SeqFeature representing the query in the HSP
261             Returns : L
262             Args : none
263              
264             =cut
265              
266             sub query {
267 50     50 1 4022 my $self = shift;
268 50 100       96 unless ($self->{_created_query}) {
269 4         16 $self->SUPER::query( new Bio::SeqFeature::Similarity
270             ('-primary' => $self->primary_tag,
271             '-start' => $self->get_field('query_start'),
272             '-end' => $self->get_field('query_end'),
273             '-expect' => $self->get_field('evalue'),
274             '-score' => $self->get_field('score'),
275             '-strand' => 1,
276             '-seq_id' => $self->get_field('query_name'),
277             #'-seqlength'=> $self->get_field('query_length'), (not known)
278             '-source' => $self->get_field('algorithm'),
279             '-seqdesc' => $self->get_field('query_description')
280             ) );
281 4         14 $self->{_created_query} = 1;
282             }
283 50         105 return $self->SUPER::query(@_);
284             }
285              
286             =head2 hit
287              
288             Title : hit
289             Usage : my $hit = $hsp->hit
290             Function: Returns a SeqFeature representing the hit in the HSP
291             Returns : L
292             Args : [optional] new value to set
293              
294             =cut
295              
296             sub hit {
297 60     60 1 4155 my $self = shift;
298 60 100       127 unless ($self->{_created_hit}) {
299             # the full length isn't always known (given in the report), but don't
300             # warn about the missing info all the time
301 6         17 my $verbose = $self->parent->parent->parent->verbose;
302 6         14 $self->parent->parent->parent->verbose(-1);
303 6         13 my $seq_length = $self->get_field('length');
304 6         14 $self->parent->parent->parent->verbose($verbose);
305            
306 6 100       20 $self->SUPER::hit( new Bio::SeqFeature::Similarity
307             ('-primary' => $self->primary_tag,
308             '-start' => $self->get_field('hit_start'),
309             '-end' => $self->get_field('hit_end'),
310             '-expect' => $self->get_field('evalue'),
311             '-score' => $self->get_field('score'),
312             '-strand' => 1,
313             '-seq_id' => $self->get_field('name'),
314             $seq_length ? ('-seqlength' => $seq_length) : (),
315             '-source' => $self->get_field('algorithm'),
316             '-seqdesc' => $self->get_field('description')
317             ) );
318 6         16 $self->{_created_hit} = 1;
319             }
320 60         145 return $self->SUPER::hit(@_);
321             }
322              
323             =head2 gaps
324              
325             Title : gaps
326             Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
327             Function : Get the number of gaps in the query, hit, or total alignment.
328             Returns : Integer, number of gaps or 0 if none
329             Args : 'query' = num conserved / length of query seq (without gaps)
330             'hit' = num conserved / length of hit seq (without gaps)
331             'total' = num conserved / length of alignment (with gaps)
332             default = 'total'
333              
334             =cut
335              
336             sub gaps {
337 14     14 1 2565 my ($self, $type) = @_;
338            
339 14 50       34 $type = lc $type if defined $type;
340 14 50 33     115 $type = 'total' if (! defined $type || $type eq 'hsp' || $type !~ /query|hit|subject|sbjct|total/);
      33        
341 14 50       20 $type = 'hit' if $type =~ /sbjct|subject/;
342            
343 14         28 $self->get_field('alignment'); # make sure gaps have been calculated
344            
345 14         47 return $self->{'_'.$type.'_gaps'};
346             }
347              
348             =head2 pvalue
349              
350             Title : pvalue
351             Usage : my $pvalue = $hsp->pvalue();
352             Function: Returns the P-value for this HSP
353             Returns : undef (Hmmpfam reports do not have p-values)
354             Args : none
355              
356             =cut
357              
358             # noop
359       4 1   sub pvalue { }
360              
361             1;