File Coverage

Bio/Search/Hit/GenericHit.pm
Criterion Covered Total %
statement 331 462 71.6
branch 192 328 58.5
condition 60 106 56.6
subroutine 41 49 83.6
pod 46 46 100.0
total 670 991 67.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::Hit::GenericHit
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::Hit::GenericHit - A generic implementation of the Bio::Search::Hit::HitI interface
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Search::Hit::GenericHit;
21             my $hit = Bio::Search::Hit::GenericHit->new(-algorithm => 'blastp');
22              
23             # typically one gets HitI objects from a SearchIO stream via a ResultI
24             use Bio::SearchIO;
25             my $parser = Bio::SearchIO->new(-format => 'blast', -file => 'result.bls');
26              
27             my $result = $parser->next_result;
28             my $hit = $result->next_hit;
29              
30             # TODO: Describe how to configure a SearchIO stream so that it generates
31             # GenericHit objects.
32              
33             =head1 DESCRIPTION
34              
35             This object handles the hit data from a Database Sequence Search such
36             as FASTA or BLAST.
37              
38             Unless you're writing a parser, you won't ever need to create a
39             GenericHit or any other HitI-implementing object. If you use
40             the SearchIO system, HitI objects are created automatically from
41             a SearchIO stream which returns Bio::Search::Hit::HitI objects.
42              
43             For documentation on what you can do with GenericHit (and other HitI
44             objects), please see the API documentation in
45             L.
46              
47             =head1 FEEDBACK
48              
49             =head2 Mailing Lists
50              
51             User feedback is an integral part of the evolution of this and other
52             Bioperl modules. Send your comments and suggestions preferably to
53             the Bioperl mailing list. Your participation is much appreciated.
54              
55             bioperl-l@bioperl.org - General discussion
56             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
57              
58             =head2 Support
59              
60             Please direct usage questions or support issues to the mailing list:
61              
62             I
63              
64             rather than to the module maintainer directly. Many experienced and
65             reponsive experts will be able look at the problem and quickly
66             address it. Please include a thorough description of the problem
67             with code and data examples if at all possible.
68              
69             =head2 Reporting Bugs
70              
71             Report bugs to the Bioperl bug tracking system to help us keep track
72             of the bugs and their resolution. Bug reports can be submitted via the
73             web:
74              
75             https://github.com/bioperl/bioperl-live/issues
76              
77             =head1 AUTHOR - Jason Stajich and Steve Chervitz
78              
79             Email jason-at-bioperl-dot-org
80             Email sac-at-bioperl-dot-org
81              
82             =head1 CONTRIBUTORS
83              
84             Sendu Bala, bix@sendu.me.uk
85              
86             =head1 APPENDIX
87              
88             The rest of the documentation details each of the object methods.
89             Internal methods are usually preceded with a _
90              
91             =cut
92              
93              
94             # Let the code begin...
95              
96              
97             package Bio::Search::Hit::GenericHit;
98 27     27   200 use strict;
  27         47  
  27         815  
99              
100 27     27   9829 use Bio::Search::SearchUtils;
  27         89  
  27         911  
101              
102 27     27   1125 use base qw(Bio::Root::Root Bio::Search::Hit::HitI);
  27         45  
  27         12889  
103              
104             =head2 new
105              
106             Title : new
107             Usage : my $obj = Bio::Search::Hit::GenericHit->new();
108             Function: Builds a new Bio::Search::Hit::GenericHit object
109             Returns : Bio::Search::Hit::GenericHit
110             Args : -name => Name of Hit (required)
111             -description => Description (optional)
112             -accession => Accession number (optional)
113             -ncbi_gi => NCBI GI UID (optional)
114             -length => Length of the Hit (optional)
115             -score => Raw Score for the Hit (optional)
116             -bits => Bit Score for the Hit (optional)
117             -significance => Significance value for the Hit (optional)
118             -algorithm => Algorithm used (BLASTP, FASTX, etc...)
119             -hsps => Array ref of HSPs for this Hit.
120             -found_again => boolean, true if hit appears in a
121             "previously found" section of a PSI-Blast report.
122             -hsp_factory => Bio::Factory::ObjectFactoryI able to create HSPI
123             objects.
124              
125             =cut
126              
127             sub new {
128 3595     3595 1 10100 my($class,@args) = @_;
129              
130 3595         8219 my $self = $class->SUPER::new(@args);
131 3595         15222 my ($hsps, $name,$query_len,$desc, $acc, $locus, $length,
132             $score,$algo,$signif,$bits, $p,
133             $rank, $hsp_factory, $gi, $iter, $found) = $self->_rearrange([qw(HSPS
134             NAME
135             QUERY_LEN
136             DESCRIPTION
137             ACCESSION
138             LOCUS
139             LENGTH SCORE ALGORITHM
140             SIGNIFICANCE BITS P
141             RANK
142             HSP_FACTORY
143             NCBI_GI
144             ITERATION
145             FOUND_AGAIN)], @args);
146            
147 3595 100       14622 defined $query_len && $self->query_length($query_len);
148              
149 3595 50       5330 if( ! defined $name ) {
150 0         0 $self->throw("Must have defined a valid name for Hit");
151             } else {
152 3595         5664 $self->name($name);
153             }
154              
155 3595 100       6693 defined $acc && $self->accession($acc);
156 3595 50       5088 defined $locus && $self->locus($locus);
157 3595 100       8075 defined $desc && $self->description($desc);
158 3595 100       5969 defined $length && $self->length($length);
159 3595 100       7829 defined $algo && $self->algorithm($algo);
160 3595 100       7561 defined $signif && $self->significance($signif);
161 3595 100       5595 defined $score && $self->raw_score($score);
162 3595 100       6164 defined $bits && $self->bits($bits);
163 3595 100       8180 defined $rank && $self->rank($rank);
164 3595 100       7982 defined $hsp_factory && $self->hsp_factory($hsp_factory);
165 3595 100       4702 defined $gi && $self->ncbi_gi($gi);
166 3595 50       4496 defined $iter && $self->iteration($iter);
167 3595 50       4832 defined $found && $self->found_again($found);
168             # p() has a weird interface, so this is a hack workaround
169 3595 100       4647 if (defined $p) {
170 1         2 $self->{_p} = $p;
171             }
172              
173 3595         4203 $self->{'_iterator'} = 0;
174 3595 100       4563 if( defined $hsps ) {
175 3534 50       11704 if( ref($hsps) !~ /array/i ) {
176 0         0 $self->warn("Did not specify a valid array ref for the param HSPS ($hsps)");
177             } else {
178 3534         4010 my $hspcount=0;
179 3534         3587 while( @{$hsps} ) {
  6367         10004  
180 2833         2968 $hspcount++;
181 2833         2521 $self->add_hsp(shift @{$hsps} );
  2833         4633  
182             }
183 3534 100       6311 $self->{'_hsps'} = undef if $hspcount == 0;
184             }
185             }
186             else {
187 61         139 $self->{'_hsps'} = undef;
188             }
189              
190 3595         12316 return $self;
191             }
192              
193             =head2 add_hsp
194              
195             Title : add_hsp
196             Usage : $hit->add_hsp($hsp)
197             Function: Add a HSP to the collection of HSPs for a Hit
198             Returns : number of HSPs in the Hit
199             Args : Bio::Search::HSP::HSPI object, OR hash ref containing data suitable
200             for creating a HSPI object (&hsp_factory must be set to get it back)
201              
202             =cut
203              
204             sub add_hsp {
205 3435     3435 1 4945 my ($self,$hsp) = @_;
206 3435 50 66     12881 if (!defined $hsp || (ref($hsp) ne 'HASH' && !$hsp->isa('Bio::Search::HSP::HSPI'))) {
      33        
207 0         0 $self->throw("Must provide a valid Bio::Search::HSP::HSPI object or hash ref to object: $self method: add_hsp value: $hsp");
208 0         0 return;
209             }
210            
211 3435         3706 push @{$self->{'_hsps'}}, $hsp;
  3435         6861  
212 3435 100       5815 if (ref($hsp) eq 'HASH') {
213 2832         3904 $self->{_hashes}->{$#{$self->{'_hsps'}}} = 1;
  2832         5698  
214             }
215 3435         4520 return scalar @{$self->{'_hsps'}};
  3435         5406  
216             }
217              
218             =head2 hsp_factory
219              
220             Title : hsp_factory
221             Usage : $hit->hsp_factory($hsp_factory)
222             Function: Get/set the factory used to build HSPI objects if necessary.
223             Returns : Bio::Factory::ObjectFactoryI
224             Args : Bio::Factory::ObjectFactoryI
225              
226             =cut
227              
228             sub hsp_factory {
229 4261     4261 1 4209 my $self = shift;
230 4261 100       5741 if (@_) { $self->{_hsp_factory} = shift }
  3533         4109  
231 4261   50     8300 return $self->{_hsp_factory} || return;
232             }
233              
234             =head2 Bio::Search::Hit::HitI methods
235              
236             Implementation of Bio::Search::Hit::HitI methods
237              
238             =head2 name
239              
240             Title : name
241             Usage : $hit_name = $hit->name();
242             Function: returns the name of the Hit sequence
243             Returns : a scalar string
244             Args : [optional] scalar string to set the name
245              
246             =cut
247              
248             sub name {
249 5117     5117 1 19167 my ($self,$value) = @_;
250 5117         6344 my $previous = $self->{'_name'};
251 5117 100 66     10367 if( defined $value || ! defined $previous ) {
252 3656 50       4703 $value = $previous = '' unless defined $value;
253 3656         4525 $self->{'_name'} = $value;
254             }
255 5117         6798 return $previous;
256             }
257              
258             =head2 accession
259              
260             Title : accession
261             Usage : $acc = $hit->accession();
262             Function: Retrieve the accession (if available) for the hit
263             Returns : a scalar string (empty string if not set)
264             Args : none
265              
266             =cut
267              
268             sub accession {
269 2952     2952 1 6652 my ($self,$value) = @_;
270 2952         3434 my $previous = $self->{'_accession'};
271 2952 100 66     5286 if( defined $value || ! defined $previous ) {
272 2377 50       3308 $value = $previous = '' unless defined $value;
273 2377         3375 $self->{'_accession'} = $value;
274             }
275 2952         3600 return $previous;
276             }
277              
278             =head2 description
279              
280             Title : description
281             Usage : $desc = $hit->description();
282             Function: Retrieve the description for the hit
283             Returns : a scalar string
284             Args : [optional] scalar string to set the description
285              
286             =cut
287              
288             sub description {
289 3669     3669 1 4365 my ($self,$value) = @_;
290 3669         3967 my $previous = $self->{'_description'};
291 3669 100 100     6147 if( defined $value || ! defined $previous ) {
292 3510 100       4846 $value = $previous = '' unless defined $value;
293 3510         4467 $self->{'_description'} = $value;
294             }
295 3669         4000 return $previous;
296             }
297              
298             =head2 length
299              
300             Title : length
301             Usage : my $len = $hit->length
302             Function: Returns the length of the hit
303             Returns : integer
304             Args : [optional] integer to set the length
305              
306             =cut
307              
308             sub length {
309 1138     1138 1 1658 my ($self,$value) = @_;
310 1138         1527 my $previous = $self->{'_length'};
311 1138 100 100     2443 if( defined $value || ! defined $previous ) {
312 954 100       1333 $value = $previous = 0 unless defined $value;
313 954         1466 $self->{'_length'} = $value;
314             }
315 1138         1752 return $previous;
316             }
317              
318              
319             =head2 algorithm
320              
321             Title : algorithm
322             Usage : $alg = $hit->algorithm();
323             Function: Gets the algorithm specification that was used to obtain the hit
324             For BLAST, the algorithm denotes what type of sequence was aligned
325             against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
326             dna-prt, TBLASTN prt-translated dna, TBLASTX translated
327             dna-translated dna).
328             Returns : a scalar string
329             Args : [optional] scalar string to set the algorithm
330              
331             =cut
332              
333             sub algorithm {
334 4661     4661 1 6237 my ($self,$value) = @_;
335 4661         5892 my $previous = $self->{'_algorithm'};
336 4661 100 66     8788 if( defined $value || ! defined $previous ) {
337 3533 50       4339 $value = $previous = '' unless defined $value;
338 3533         4127 $self->{'_algorithm'} = $value;
339             }
340 4661         5554 return $previous;
341             }
342              
343             =head2 raw_score
344              
345             Title : raw_score
346             Usage : $score = $hit->raw_score();
347             Function: Gets the "raw score" generated by the algorithm. What
348             this score is exactly will vary from algorithm to algorithm,
349             returning undef if unavailable.
350             Returns : a scalar value
351             Args : [optional] scalar value to set the raw score
352              
353             =cut
354              
355             sub raw_score {
356 1336     1336 1 1740 my ($self,$value) = @_;
357 1336         1524 my $previous = $self->{'_score'};
358 1336 100       1919 if( defined $value ) {
    100          
359 1235         1747 $self->{'_score'} = $value;
360             } elsif ( ! defined $previous ) {
361             # Set the bits of the Hit to that of the top HSP.
362 53 50       143 unless( defined $self->{'_hsps'}->[0] ) {
363 0         0 $self->warn("No HSPs for this minimal Hit (".$self->name.")\n".
364             "If using NCBI BLAST, check bits() instead");
365 0         0 return;
366             }
367             # use 'score' if available
368 53 100       162 if ( defined( ($self->hsps)[0]->score ) ) {
    50          
369 52         122 $previous = $self->{'_score'} = ($self->hsps)[0]->score;
370             }
371             # otherwise use 'bits'
372             elsif ( defined( ($self->hsps)[0]->bits ) ) {
373 1         3 $previous = $self->{'_score'} = ($self->hsps)[0]->bits;
374             }
375             }
376 1336         1636 return $previous;
377             }
378              
379             =head2 score
380              
381             Equivalent to L
382              
383             =cut
384              
385 14     14 1 81 sub score { shift->raw_score(@_); }
386              
387             =head2 significance
388              
389             Title : significance
390             Usage : $significance = $hit->significance();
391             Function: Used to obtain the E or P value of a hit, i.e. the probability that
392             this particular hit was obtained purely by random chance. If
393             information is not available (nor calculatable from other
394             information sources), return undef.
395             Returns : a scalar value or undef if unavailable
396             Args : [optional] scalar value to set the significance
397              
398             =cut
399              
400             sub significance {
401 3687     3687 1 4535 my ($self,$value) = @_;
402 3687         4039 my $previous = $self->{'_significance'};
403 3687 100       4815 if( defined $value ) {
    100          
404 3506         4854 $self->{'_significance'} = $value;
405             } elsif ( ! defined $previous ) {
406 4 50       16 unless( defined $self->{'_hsps'}->[0] ) {
407 0         0 $self->warn("No HSPs for this Hit (".$self->name.")");
408 0         0 return;
409             }
410             # Set the significance of the Hit to that of the top HSP.
411 4         15 $previous = $self->{'_significance'} = ($self->hsps)[0]->significance;
412             }
413              
414 3687         4986 return $previous;
415             }
416              
417             =head2 bits
418              
419             Usage : $hit_object->bits();
420             Purpose : Gets the bit score of the best HSP for the current hit.
421             Example : $bits = $hit_object->bits();
422             Returns : Integer or undef if bit score is not set
423             Argument : n/a
424             Comments : For BLAST1, the non-bit score is listed in the summary line.
425              
426             See Also : L
427              
428             =cut
429              
430             sub bits {
431 1696     1696 1 2085 my ($self,$value) = @_;
432 1696         2008 my $previous = $self->{'_bits'};
433 1696 100       2289 if( defined $value ) {
    100          
434 1546         3213 $self->{'_bits'} = $value;
435             } elsif ( ! defined $previous ) {
436             # Set the bits of the Hit to that of the top HSP.
437 90 50       214 unless( defined $self->{'_hsps'}->[0] ) {
438 0         0 $self->warn("No HSPs for this minimal Hit (".$self->name.")\n".
439             "If using WU-BLAST, check raw_score() instead");
440 0         0 return;
441             }
442 90         235 $previous = $self->{'_bits'} = ($self->hsps)[0]->bits;
443             }
444 1696         2379 return $previous;
445             }
446              
447             =head2 next_hsp
448              
449             Title : next_hsp
450             Usage : while( $hsp = $obj->next_hsp()) { ... }
451             Function : Returns the next available High Scoring Pair
452             Example :
453             Returns : Bio::Search::HSP::HSPI object or null if finished
454             Args : none
455              
456             =cut
457              
458             sub next_hsp {
459 347     347 1 14872 my $self = shift;
460 347 50       861 $self->{'_iterator'} = 0 unless defined $self->{'_iterator'};
461             return unless
462             defined($self->{'_hsps'})
463 347 100 66     981 && $self->{'_iterator'} <= scalar @{$self->{'_hsps'}};
  257         849  
464            
465 257         508 my $iterator = $self->{'_iterator'}++;
466 257   100     781 my $hsp = $self->{'_hsps'}->[$iterator] || return;
467 193 100       572 if (ref($hsp) eq 'HASH') {
468 142   33     359 my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
469 142         231 $hsp = $factory->create_object(%{$hsp});
  142         1203  
470 142         933 $self->{'_hsps'}->[$iterator] = $hsp;
471 142         363 delete $self->{_hashes}->{$iterator};
472             }
473 193         547 return $hsp;
474             }
475              
476              
477             =head2 hsps
478              
479             Usage : $hit_object->hsps();
480             Purpose : Get a list containing all HSP objects.
481             : Get the numbers of HSPs for the current hit.
482             Example : @hsps = $hit_object->hsps();
483             : $num = $hit_object->hsps(); # alternatively, use num_hsps()
484             Returns : Array context : list of Bio::Search::HSP::BlastHSP.pm objects.
485             : Scalar context: integer (number of HSPs).
486             : (Equivalent to num_hsps()).
487             Argument : n/a. Relies on wantarray
488             Throws : Exception if the HSPs have not been collected.
489              
490             See Also : L, L
491              
492             =cut
493              
494             sub hsps {
495 3371     3371 1 4327 my $self = shift;
496 3371 100       3212 foreach my $i (keys %{$self->{_hashes} || {}}) {
  3371         9906  
497 586   33     1357 my $factory = $self->hsp_factory || $self->throw("Tried to get a HSP, but it was a hash ref and we have no hsp factory");
498 586         803 $self->{'_hsps'}->[$i] = $factory->create_object(%{$self->{'_hsps'}->[$i]});
  586         5080  
499 586         3903 delete $self->{_hashes}->{$i};
500             }
501            
502 3371 100       5961 return wantarray() ? @{$self->{'_hsps'} || []} : scalar(@{$self->{'_hsps'} || []});
  3369 50       11396  
  2 100       22  
503             }
504              
505             =head2 num_hsps
506              
507             Usage : $hit_object->num_hsps();
508             Purpose : Get the number of HSPs for the present hit.
509             Example : $nhsps = $hit_object->num_hsps();
510             Returns : Integer or '-' if HSPs have not been callected
511             Argument : n/a
512              
513             See Also : L
514              
515             =cut
516              
517             sub num_hsps {
518 139     139 1 204 my $self = shift;
519            
520 139 50       280 unless ($self->{'_hsps'}) {
521 0         0 return '-';
522             }
523            
524 139         154 return scalar(@{$self->{'_hsps'}});
  139         461  
525             }
526              
527             =head2 rewind
528              
529             Title : rewind
530             Usage : $hit->rewind;
531             Function: Allow one to reset the HSP iterator to the beginning
532             Since this is an in-memory implementation
533             Returns : none
534             Args : none
535              
536             =cut
537              
538             sub rewind{
539 102     102 1 112 my ($self) = @_;
540 102         175 $self->{'_iterator'} = 0;
541             }
542              
543             =head2 ambiguous_aln
544              
545             Usage : $ambig_code = $hit_object->ambiguous_aln();
546             Purpose : Sets/Gets ambiguity code data member.
547             Example : (see usage)
548             Returns : String = 'q', 's', 'qs', '-'
549             : 'q' = query sequence contains overlapping sub-sequences
550             : while sbjct does not.
551             : 's' = sbjct sequence contains overlapping sub-sequences
552             : while query does not.
553             : 'qs' = query and sbjct sequence contains overlapping sub-sequences
554             : relative to each other.
555             : '-' = query and sbjct sequence do not contains multiple domains
556             : relative to each other OR both contain the same distribution
557             : of similar domains.
558             Argument : n/a
559             Throws : n/a
560             Comment : Note: "sbjct" is synonymous with "hit"
561              
562             =cut
563              
564             sub ambiguous_aln {
565 9     9 1 22 my $self = shift;
566 9 50       20 if(@_) { $self->{'_ambiguous_aln'} = shift; }
  9         21  
567 9 50       28 $self->{'_ambiguous_aln'} || '-';
568             }
569              
570             =head2 overlap
571              
572             See documentation in L
573              
574             =cut
575              
576             sub overlap {
577 10     10 1 20 my $self = shift;
578 10 50       28 if(@_) { $self->{'_overlap'} = shift; }
  0         0  
579 10 50       29 defined $self->{'_overlap'} ? $self->{'_overlap'} : 0;
580             }
581              
582              
583             =head2 n
584              
585             Usage : $hit_object->n();
586             Purpose : Gets the N number for the current hit.
587             : This is the number of HSPs in the set which was ascribed
588             : the lowest P-value (listed on the description line).
589             : This number is not the same as the total number of HSPs.
590             : To get the total number of HSPs, use num_hsps().
591             Example : $n = $hit_object->n();
592             Returns : Integer
593             Argument : n/a
594             Throws : Exception if HSPs have not been set (BLAST2 reports).
595             Comments : Note that the N parameter is not reported in gapped BLAST2.
596             : Calling n() on such reports will result in a call to num_hsps().
597             : The num_hsps() method will count the actual number of
598             : HSPs in the alignment listing, which may exceed N in
599             : some cases.
600              
601             See Also : L
602              
603             =cut
604              
605             sub n {
606 0     0 1 0 my $self = shift;
607              
608             # The check for $self->{'_n'} is a remnant from the 'query' mode days
609             # in which the sbjct object would collect data from the description
610             # line only.
611              
612 0         0 my ($n);
613 0 0       0 if(not defined($self->{'_n'})) {
614 0 0       0 if( $self->hsp ) {
615 0         0 $n = $self->hsp->n;
616             }
617             } else {
618 0         0 $n = $self->{'_n'};
619             }
620 0   0     0 $n ||= $self->num_hsps;
621              
622 0         0 return $n;
623             }
624              
625             =head2 p
626              
627             Usage : $hit_object->p( [format] );
628             Purpose : Get the P-value for the best HSP of the given BLAST hit.
629             : (Note that P-values are not provided with NCBI Blast2 reports).
630             Example : $p = $sbjct->p;
631             : $p = $sbjct->p('exp'); # get exponent only.
632             : ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
633             Returns : Float or scientific notation number (the raw P-value, DEFAULT).
634             : Integer if format == 'exp' (the magnitude of the base 10 exponent).
635             : 2-element list (float, int) if format == 'parts' and P-value
636             : is in scientific notation (See Comments).
637             Argument : format: string of 'raw' | 'exp' | 'parts'
638             : 'raw' returns value given in report. Default. (1.2e-34)
639             : 'exp' returns exponent value only (34)
640             : 'parts' returns the decimal and exponent as a
641             : 2-element list (1.2, -34) (See Comments).
642             Throws : Warns if no P-value is defined. Uses expect instead.
643             Comments : Using the 'parts' argument is not recommended since it will not
644             : work as expected if the P-value is not in scientific notation.
645             : That is, floats are not converted into sci notation before
646             : splitting into parts.
647              
648             See Also : L, L, L
649              
650             =cut
651              
652             sub p {
653             # Some duplication of logic for p(), expect() and signif() for the sake of performance.
654 3     3 1 129 my ($self, $fmt) = @_;
655              
656 3         9 my $val = $self->{'_p'};
657              
658             # $val can be zero.
659 3 100       11 if(!defined $val) {
660             # P-value not defined, must be a NCBI Blast2 report.
661             # Use expect instead.
662 2         10 $self->warn( "P-value not defined. Using significance() instead.");
663 2         9 $val = $self->significance();
664             }
665              
666 3 50 33     19 return $val if not $fmt or $fmt =~ /^raw/i;
667             ## Special formats: exponent-only or as list.
668 0 0       0 return &Bio::Search::SearchUtils::get_exponent($val) if $fmt =~ /^exp/i;
669 0 0       0 return (split (/eE/, $val)) if $fmt =~ /^parts/i;
670              
671             ## Default: return the raw P-value.
672 0         0 return $val;
673             }
674              
675             =head2 hsp
676              
677             Usage : $hit_object->hsp( [string] );
678             Purpose : Get a single HSPI object for the present HitI object.
679             Example : $hspObj = $hit_object->hsp; # same as 'best'
680             : $hspObj = $hit_object->hsp('best');
681             : $hspObj = $hit_object->hsp('worst');
682             Returns : Object reference for a Bio::Search::HSP::BlastHSP.pm object.
683             Argument : String (or no argument).
684             : No argument (default) = highest scoring HSP (same as 'best').
685             : 'best' or 'first' = highest scoring HSP.
686             : 'worst' or 'last' = lowest scoring HSP.
687             Throws : Exception if the HSPs have not been collected.
688             : Exception if an unrecognized argument is used.
689              
690             See Also : L, L()
691              
692             =cut
693              
694             sub hsp {
695 48     48 1 115 my( $self, $option ) = @_;
696 48   50     213 $option ||= 'best';
697            
698 48 50       115 if (not ref $self->{'_hsps'}) {
699 0         0 $self->throw("Can't get HSPs: data not collected.");
700             }
701              
702 48         96 my @hsps = $self->hsps;
703            
704 48 50       288 return $hsps[0] if $option =~ /best|first|1/i;
705 0 0       0 return $hsps[$#hsps] if $option =~ /worst|last/i;
706              
707 0         0 $self->throw("Can't get HSP for: $option\n" .
708             "Valid arguments: 'best', 'worst'");
709             }
710              
711             =head2 logical_length
712              
713             Usage : $hit_object->logical_length( [seq_type] );
714             : (mostly intended for internal use).
715             Purpose : Get the logical length of the hit sequence.
716             : This is necessary since the number of identical/conserved residues
717             : can be in terms of peptide sequence space, yet the query and/or hit
718             : sequence are in nucleotide space.
719             Example : $len = $hit_object->logical_length();
720             Returns : Integer
721             Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
722             ('sbjct' is synonymous with 'hit')
723             Throws : n/a
724             Comments :
725             : In the case of BLAST flavors:
726             : For TBLASTN reports, the length of the aligned portion of the
727             : nucleotide hit sequence is divided by 3; for BLASTX reports,
728             : the length of the aligned portion of the nucleotide query
729             : sequence is divided by 3. For TBLASTX reports, the length of
730             : both hit and query sequence are converted.
731             :
732             : This is important for functions like frac_aligned_query()
733             : which need to operate in amino acid coordinate space when dealing
734             : with [T]BLAST[NX] type reports.
735              
736             See Also : L, L, L
737              
738             =cut
739              
740             sub logical_length {
741 32     32 1 44 my $self = shift;
742 32   50     64 my $seqType = shift || 'query';
743 32 50       64 $seqType = 'sbjct' if $seqType eq 'hit';
744              
745 32         43 my ($length, $logical);
746 32         82 my $algo = $self->algorithm;
747              
748             # For the sbjct, return logical sbjct length
749 32 100       60 if( $seqType eq 'sbjct' ) {
750 13         29 $length = $self->length;
751             } else {
752             # Otherwise, return logical query length
753 19         43 $length = $self->query_length();
754             }
755              
756 32         70 $logical = Bio::Search::SearchUtils::logical_length($algo, $seqType, $length);
757              
758 32         73 return int($logical);
759             }
760              
761             =head2 length_aln
762              
763             Usage : $hit_object->length_aln( [seq_type] );
764             Purpose : Get the total length of the aligned region for query or sbjct seq.
765             : This number will include all HSPs
766             Example : $len = $hit_object->length_aln(); # default = query
767             : $lenAln = $hit_object->length_aln('query');
768             Returns : Integer
769             Argument : seq_Type = 'query' or 'hit' or 'sbjct' (Default = 'query')
770             ('sbjct' is synonymous with 'hit')
771             Throws : Exception if the argument is not recognized.
772             Comments : This method will report the logical length of the alignment,
773             : meaning that for TBLAST[NX] reports, the length is reported
774             : using amino acid coordinate space (i.e., nucleotides / 3).
775             :
776             : This method requires that all HSPs be tiled. If they have not
777             : already been tiled, they will be tiled first automatically..
778             : If you don't want the tiled data, iterate through each HSP
779             : calling length() on each (use hsps() to get all HSPs).
780              
781             See Also : L, L, L, L, L, L
782              
783             =cut
784              
785             sub length_aln {
786 448     448 1 653 my( $self, $seqType, $num ) = @_;
787              
788 448   50     621 $seqType ||= 'query';
789 448 100       620 $seqType = 'sbjct' if $seqType eq 'hit';
790              
791             # Setter:
792 448 100       621 if( defined $num) {
793 270         620 return $self->{'_length_aln_'.$seqType} = $num;
794             }
795              
796 178 50       282 unless ($self->{'_hsps'}) {
797             #return wantarray ? ('-','-') : '-';
798 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
799 0         0 return '-';
800             }
801              
802 178 50       230 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
803              
804 178         296 my $data = $self->{'_length_aln_'.$seqType};
805            
806             ## If we don't have data, figure out what went wrong.
807 178 50       241 if(!$data) {
808 0         0 $self->throw("Can't get length aln for sequence type \"$seqType\". " .
809             "Valid types are 'query', 'hit', 'sbjct' ('sbjct' = 'hit')");
810             }
811 178         537 return $data;
812             }
813              
814             =head2 gaps
815              
816             Usage : $hit_object->gaps( [seq_type] );
817             Purpose : Get the number of gaps in the aligned query, hit, or both sequences.
818             : Data is summed across all HSPs.
819             Example : $qgaps = $hit_object->gaps('query');
820             : $hgaps = $hit_object->gaps('hit');
821             : $tgaps = $hit_object->gaps(); # default = total (query + hit)
822             Returns : scalar context: integer
823             : array context without args: two-element list of integers
824             : (queryGaps, hitGaps)
825             : Array context can be forced by providing an argument of 'list' or 'array'.
826             :
827             : CAUTION: Calling this method within printf or sprintf is arrray context.
828             : So this function may not give you what you expect. For example:
829             : printf "Total gaps: %d", $hit->gaps();
830             : Actually returns a two-element array, so what gets printed
831             : is the number of gaps in the query, not the total
832             :
833             Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total' | 'list' (default = 'total')
834             ('sbjct' is synonymous with 'hit')
835             Throws : n/a
836             Comments : If you need data for each HSP, use hsps() and then interate
837             : through each HSP object.
838             : This method requires that all HSPs be tiled. If they have not
839             : already been tiled, they will be tiled first automatically..
840             : Not relying on wantarray since that will fail in situations
841             : such as printf "%d", $hit->gaps() in which you might expect to
842             : be printing the total gaps, but evaluates to array context.
843              
844             See Also : L
845              
846             =cut
847              
848             sub gaps {
849 116     116 1 197 my( $self, $seqType, $num ) = @_;
850              
851 116 0 33     160 $seqType ||= (wantarray ? 'list' : 'total');
852 116 100       188 $seqType = 'sbjct' if $seqType eq 'hit';
853              
854 116 50       208 unless ($self->{'_hsps'}) {
855 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
856 0 0       0 return wantarray ? ('-','-') : '-';
857             #return '-';
858             }
859              
860 116 50       166 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
861              
862 116         167 $seqType = lc($seqType);
863              
864 116 50       164 if( defined $num ) {
    0          
    0          
865 116 50 66     299 $self->throw("Can't set gaps for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
866              
867 116         293 return $self->{'_gaps_'.$seqType} = $num;
868             }
869             elsif($seqType =~ /list|array/i) {
870 0         0 return ($self->{'_gaps_query'}, $self->{'_gaps_sbjct'});
871             }
872             elsif($seqType eq 'total') {
873 0   0     0 return ($self->{'_gaps_query'} + $self->{'_gaps_sbjct'}) || 0;
874             } else {
875 0   0     0 return $self->{'_gaps_'.$seqType} || 0;
876             }
877             }
878              
879              
880             =head2 matches
881              
882             See documentation in L
883              
884             =cut
885              
886             sub matches {
887 124     124 1 228 my( $self, $arg1, $arg2) = @_;
888 124         141 my(@data,$data);
889              
890 124 50       260 unless ($self->{'_hsps'}) {
891 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
892 0 0       0 return wantarray ? ('-','-') : '-';
893             }
894              
895 124 100       220 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
896              
897 124 100       197 unless( $arg1 ) {
898 14         37 @data = ($self->{'_totalIdentical'}, $self->{'_totalConserved'});
899              
900 14         34 return @data;
901             } else {
902              
903 110 100       303 if( defined $arg2 ) {
    100          
904 46         77 $self->{'_totalIdentical'} = $arg1;
905 46         82 $self->{'_totalConserved'} = $arg2;
906 46         85 return ( $arg1, $arg2 );
907             }
908             elsif($arg1 =~ /^id/i) {
909 45         64 $data = $self->{'_totalIdentical'};
910             } else {
911 19         49 $data = $self->{'_totalConserved'};
912             }
913             #print STDERR "\nmatches(): id=$self->{'_totalIdentical'}, cons=$self->{'_totalConserved'}\n\n";
914 64         156 return $data;
915             }
916            
917             ## If we make it to here, it is likely the case that
918             ## the parser constructed a minimal hit object from the summary line only.
919             ## It either delibrately skipped parsing the alignment section,
920             ## or was not able to because it was absent (due to blast executable parameter
921             ## setting such as -b 0 (B=0 for WU-BLAST) )
922             #$self->throw("Can't get identical or conserved data: no data.");
923             }
924              
925              
926             =head2 start
927              
928             Usage : $sbjct->start( [seq_type] );
929             Purpose : Gets the start coordinate for the query, sbjct, or both sequences
930             : in the BlastHit object. If there is more than one HSP, the lowest start
931             : value of all HSPs is returned.
932             Example : $qbeg = $sbjct->start('query');
933             : $sbeg = $sbjct->start('hit');
934             : ($qbeg, $sbeg) = $sbjct->start();
935             Returns : scalar context: integer
936             : array context without args: list of two integers (queryStart, sbjctStart)
937             : Array context can be "induced" by providing an argument of 'list' or 'array'.
938             Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
939             ('sbjct' is synonymous with 'hit')
940             Throws : n/a
941              
942             See Also : L, L, L,
943             L
944              
945             =cut
946              
947             sub start {
948 24     24 1 57 my ($self, $seqType, $num) = @_;
949              
950 24 50       53 unless ($self->{'_hsps'}) {
951 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
952 0 0       0 return wantarray ? ('-','-') : '-';
953             }
954              
955 24 50 66     62 $seqType ||= (wantarray ? 'list' : 'query');
956 24 100       54 $seqType = 'sbjct' if $seqType eq 'hit';
957              
958 24 100       44 if( defined $num ) {
959 20         31 $seqType = "_\L$seqType\E";
960 20         48 return $self->{$seqType.'Start'} = $num;
961             }
962              
963             # If there is only one HSP, defer this call to the solitary HSP.
964 4 100       15 if($self->num_hsps == 1) {
965 3         11 return $self->hsp->start($seqType);
966             }
967             else {
968             # Tiling normally generates $self->{'_queryStart'} and
969             # $self->{'_sbjctStart'}, but is very slow. If we haven't tiled,
970             # find the answer quickly without tiling.
971 1 50       6 unless (defined $self->{'_queryStart'}) {
972 1         2 my $earliest_query_start;
973             my $earliest_sbjct_start;
974 1         4 foreach my $hsp ($self->hsps) {
975 6         18 my $this_query_start = $hsp->start('query');
976 6 100 100     22 if (! defined $earliest_query_start || $this_query_start < $earliest_query_start) {
977 3         4 $earliest_query_start = $this_query_start;
978             }
979            
980 6         12 my $this_sbjct_start = $hsp->start('sbjct');
981 6 100 100     24 if (! defined $earliest_sbjct_start || $this_sbjct_start < $earliest_sbjct_start) {
982 3         8 $earliest_sbjct_start = $this_sbjct_start;
983             }
984             }
985 1         4 $self->{'_queryStart'} = $earliest_query_start;
986 1         2 $self->{'_sbjctStart'} = $earliest_sbjct_start;
987             }
988            
989            
990 1 50       28 if ($seqType =~ /list|array/i) {
991 0         0 return ($self->{'_queryStart'}, $self->{'_sbjctStart'});
992             } else {
993             ## Sensitive to member name changes.
994 1         4 $seqType = "_\L$seqType\E";
995 1         9 return $self->{$seqType.'Start'};
996             }
997             }
998             }
999              
1000              
1001             =head2 end
1002              
1003             Usage : $sbjct->end( [seq_type] );
1004             Purpose : Gets the end coordinate for the query, sbjct, or both sequences
1005             : in the BlastHit object. If there is more than one HSP,
1006             the largest end
1007             : value of all HSPs is returned.
1008             Example : $qend = $sbjct->end('query');
1009             : $send = $sbjct->end('hit');
1010             : ($qend, $send) = $sbjct->end();
1011             Returns : scalar context: integer
1012             : array context without args: list of two integers
1013             : (queryEnd, sbjctEnd)
1014             : Array context can be "induced" by providing an argument
1015             : of 'list' or 'array'.
1016             Argument : In scalar context: seq_type = 'query' or 'sbjct'
1017             : (case insensitive). If not supplied, 'query' is used.
1018             Throws : n/a
1019              
1020             See Also : L, L, L
1021              
1022             =cut
1023              
1024             sub end {
1025 24     24 1 52 my ($self, $seqType, $num) = @_;
1026              
1027 24 50       56 unless ($self->{'_hsps'}) {
1028 0 0       0 return wantarray ? ('-','-') : '-';
1029             }
1030              
1031 24 50 66     55 $seqType ||= (wantarray ? 'list' : 'query');
1032 24 100       50 $seqType = 'sbjct' if $seqType eq 'hit';
1033              
1034 24 100       49 if( defined $num ) {
1035 20         28 $seqType = "_\L$seqType\E";
1036 20         46 return $self->{$seqType.'Stop'} = $num;
1037             }
1038              
1039             # If there is only one HSP, defer this call to the solitary HSP.
1040 4 100       10 if($self->num_hsps == 1) {
1041 3         9 return $self->hsp->end($seqType);
1042             }
1043             else {
1044             # Tiling normally generates $self->{'_queryStop'} and
1045             # $self->{'_sbjctStop'}, but is very slow. If we haven't tiled,
1046             # find the answer quickly without tiling.
1047 1 50       5 unless (defined $self->{'_queryStop'}) {
1048 1         2 my $latest_query_end;
1049             my $latest_sbjct_end;
1050 1         4 foreach my $hsp ($self->hsps) {
1051 6         13 my $this_query_end = $hsp->end('query');
1052 6 100 100     21 if (! defined $latest_query_end || $this_query_end > $latest_query_end) {
1053 4         5 $latest_query_end = $this_query_end;
1054             }
1055            
1056 6         8 my $this_sbjct_end = $hsp->end('sbjct');
1057 6 100 100     20 if (! defined $latest_sbjct_end || $this_sbjct_end > $latest_sbjct_end) {
1058 2         3 $latest_sbjct_end = $this_sbjct_end;
1059             }
1060             }
1061 1         6 $self->{'_queryStop'} = $latest_query_end;
1062 1         2 $self->{'_sbjctStop'} = $latest_sbjct_end;
1063             }
1064            
1065            
1066 1 50       5 if($seqType =~ /list|array/i) {
1067 0         0 return ($self->{'_queryStop'}, $self->{'_sbjctStop'});
1068             } else {
1069             ## Sensitive to member name changes.
1070 1         3 $seqType = "_\L$seqType\E";
1071 1         6 return $self->{$seqType.'Stop'};
1072             }
1073             }
1074             }
1075              
1076             =head2 range
1077              
1078             Usage : $sbjct->range( [seq_type] );
1079             Purpose : Gets the (start, end) coordinates for the query or sbjct sequence
1080             : in the HSP alignment.
1081             Example : ($qbeg, $qend) = $sbjct->range('query');
1082             : ($sbeg, $send) = $sbjct->range('hit');
1083             Returns : Two-element array of integers
1084             Argument : seq_type = string, 'query' or 'hit' or 'sbjct' (default = 'query')
1085             ('sbjct' is synonymous with 'hit')
1086             Throws : n/a
1087              
1088             See Also : L, L
1089              
1090             =cut
1091              
1092             sub range {
1093 0     0 1 0 my ($self, $seqType) = @_;
1094 0   0     0 $seqType ||= 'query';
1095 0 0       0 $seqType = 'sbjct' if $seqType eq 'hit';
1096 0         0 return ($self->start($seqType), $self->end($seqType));
1097             }
1098              
1099              
1100             =head2 frac_identical
1101              
1102             Usage : $hit_object->frac_identical( [seq_type] );
1103             Purpose : Get the overall fraction of identical positions across all HSPs.
1104             : The number refers to only the aligned regions and does not
1105             : account for unaligned regions in between the HSPs, if any.
1106             Example : $frac_iden = $hit_object->frac_identical('query');
1107             Returns : Float (2-decimal precision, e.g., 0.75).
1108             Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1109             : default = 'query' (but see comments below).
1110             : ('sbjct' is synonymous with 'hit')
1111             Throws : n/a
1112             Comments :
1113             : To compute the fraction identical, the logical length of the
1114             : aligned portion of the sequence is used, meaning that
1115             : in the case of BLAST flavors, for TBLASTN reports, the length of
1116             : the aligned portion of the
1117             : nucleotide hit sequence is divided by 3; for BLASTX reports,
1118             : the length of the aligned portion of the nucleotide query
1119             : sequence is divided by 3. For TBLASTX reports, the length of
1120             : both hit and query sequence are converted.
1121             : This is necessary since the number of identical residues is
1122             : in terms of peptide sequence space.
1123             :
1124             : Different versions of Blast report different values for the total
1125             : length of the alignment. This is the number reported in the
1126             : denominators in the stats section:
1127             : "Identical = 34/120 Positives = 67/120".
1128             : NCBI BLAST uses the total length of the alignment (with gaps)
1129             : WU-BLAST uses the length of the query sequence (without gaps).
1130             :
1131             : Therefore, when called with an argument of 'total',
1132             : this method will report different values depending on the
1133             : version of BLAST used. Total does NOT take into account HSP
1134             : tiling, so it should not be used.
1135             :
1136             : To get the fraction identical among only the aligned residues,
1137             : ignoring the gaps, call this method without an argument or
1138             : with an argument of 'query' or 'hit'.
1139             :
1140             : If you need data for each HSP, use hsps() and then iterate
1141             : through the HSP objects.
1142             : This method requires that all HSPs be tiled. If they have not
1143             : already been tiled, they will be tiled first automatically.
1144              
1145             See Also : L, L, L, L
1146              
1147             =cut
1148              
1149             sub frac_identical {
1150 41     41 1 118 my ($self, $seqType) = @_;
1151 41   100     94 $seqType ||= 'query';
1152 41 100       80 $seqType = 'sbjct' if $seqType eq 'hit';
1153              
1154             ## Sensitive to member name format.
1155 41         70 $seqType = lc($seqType);
1156              
1157 41 50       85 unless ($self->{'_hsps'}) {
1158 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1159             #return wantarray ? ('-','-') : '-';
1160 0         0 return '-';
1161             }
1162              
1163 41 100       86 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1164              
1165 41         106 my $ident = $self->matches('id');
1166 41         91 my $total = $self->length_aln($seqType);
1167 41         91 my $ratio = $ident / $total;
1168 41         299 my $ratio_rounded = sprintf( "%.3f", $ratio);
1169              
1170             # Round down iff normal rounding yields 1 (just like blast)
1171 41 50 66     159 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1172 41         225 return $ratio_rounded;
1173             }
1174              
1175              
1176             =head2 frac_conserved
1177              
1178             Usage : $hit_object->frac_conserved( [seq_type] );
1179             Purpose : Get the overall fraction of conserved positions across all HSPs.
1180             : The number refers to only the aligned regions and does not
1181             : account for unaligned regions in between the HSPs, if any.
1182             Example : $frac_cons = $hit_object->frac_conserved('hit');
1183             Returns : Float (2-decimal precision, e.g., 0.75).
1184             Argument : seq_type: 'query' | 'hit' or 'sbjct' | 'total'
1185             : default = 'query' (but see comments below).
1186             : ('sbjct' is synonymous with 'hit')
1187             Throws : n/a
1188             Comments :
1189             : To compute the fraction conserved, the logical length of the
1190             : aligned portion of the sequence is used, meaning that
1191             : in the case of BLAST flavors, for TBLASTN reports, the length of
1192             : the aligned portion of the
1193             : nucleotide hit sequence is divided by 3; for BLASTX reports,
1194             : the length of the aligned portion of the nucleotide query
1195             : sequence is divided by 3. For TBLASTX reports, the length of
1196             : both hit and query sequence are converted.
1197             : This is necessary since the number of conserved residues is
1198             : in terms of peptide sequence space.
1199             :
1200             : Different versions of Blast report different values for the total
1201             : length of the alignment. This is the number reported in the
1202             : denominators in the stats section:
1203             : "Positives = 34/120 Positives = 67/120".
1204             : NCBI BLAST uses the total length of the alignment (with gaps)
1205             : WU-BLAST uses the length of the query sequence (without gaps).
1206             :
1207             : Therefore, when called with an argument of 'total',
1208             : this method will report different values depending on the
1209             : version of BLAST used. Total does NOT take into account HSP
1210             : tiling, so it should not be used.
1211             :
1212             : To get the fraction conserved among only the aligned residues,
1213             : ignoring the gaps, call this method without an argument or
1214             : with an argument of 'query' or 'hit'.
1215             :
1216             : If you need data for each HSP, use hsps() and then interate
1217             : through the HSP objects.
1218             : This method requires that all HSPs be tiled. If they have not
1219             : already been tiled, they will be tiled first automatically.
1220              
1221             See Also : L, L, L
1222              
1223             =cut
1224              
1225             sub frac_conserved {
1226 15     15 1 34 my ($self, $seqType) = @_;
1227 15   100     36 $seqType ||= 'query';
1228 15 100       32 $seqType = 'sbjct' if $seqType eq 'hit';
1229              
1230             ## Sensitive to member name format.
1231 15         28 $seqType = lc($seqType);
1232              
1233 15 50       32 unless ($self->{'_hsps'}) {
1234 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1235             #return wantarray ? ('-','-') : '-';
1236 0         0 return '-';
1237             }
1238              
1239 15 50       33 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1240              
1241 15         32 my $consv = $self->matches('cons');
1242 15         33 my $total = $self->length_aln($seqType);
1243 15         30 my $ratio = $consv / $total;
1244 15         84 my $ratio_rounded = sprintf( "%.3f", $ratio);
1245              
1246             # Round down iff normal rounding yields 1 (just like blast)
1247 15 50 33     57 $ratio_rounded = 0.999 if (($ratio_rounded == 1) && ($ratio < 1));
1248 15         91 return $ratio_rounded;
1249             }
1250              
1251              
1252              
1253              
1254             =head2 frac_aligned_query
1255              
1256             Usage : $hit_object->frac_aligned_query();
1257             Purpose : Get the fraction of the query sequence which has been aligned
1258             : across all HSPs (not including intervals between non-overlapping
1259             : HSPs).
1260             Example : $frac_alnq = $hit_object->frac_aligned_query();
1261             Returns : Float (2-decimal precision, e.g., 0.75),
1262             : or undef if query length is unknown to avoid division by 0.
1263             Argument : n/a
1264             Throws : n/a
1265             Comments : If you need data for each HSP, use hsps() and then interate
1266             : through the HSP objects.
1267             : This method requires that all HSPs be tiled. If they have not
1268             : already been tiled, they will be tiled first automatically.
1269              
1270             See Also : L, L, L, L
1271              
1272             =cut
1273              
1274             sub frac_aligned_query {
1275 19     19 1 1444 my $self = shift;
1276              
1277 19 50       63 unless ($self->{'_hsps'}) {
1278 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1279             #return wantarray ? ('-','-') : '-';
1280 0         0 return '-';
1281             }
1282              
1283 19 100       53 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1284              
1285 19         97 my $qry_len = $self->logical_length('query');
1286 19 100       53 return undef if $qry_len == 0; # Avoid division by 0 crash
1287 16         41 sprintf( "%.2f", $self->length_aln('query') / $qry_len);
1288             }
1289              
1290              
1291              
1292             =head2 frac_aligned_hit
1293              
1294             Usage : $hit_object->frac_aligned_hit();
1295             Purpose : Get the fraction of the hit (sbjct) sequence which has been aligned
1296             : across all HSPs (not including intervals between non-overlapping
1297             : HSPs).
1298             Example : $frac_alnq = $hit_object->frac_aligned_hit();
1299             Returns : Float (2-decimal precision, e.g., 0.75),
1300             : or undef if hit length is unknown to avoid division by 0.
1301             Argument : n/a
1302             Throws : n/a
1303             Comments : If you need data for each HSP, use hsps() and then interate
1304             : through the HSP objects.
1305             : This method requires that all HSPs be tiled. If they have not
1306             : already been tiled, they will be tiled first automatically.
1307              
1308             See Also : L, L, , L, L, L
1309              
1310             =cut
1311              
1312             sub frac_aligned_hit {
1313 13     13 1 36 my $self = shift;
1314              
1315 13 50       46 unless ($self->{'_hsps'}) {
1316 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1317             #return wantarray ? ('-','-') : '-';
1318 0         0 return '-';
1319             }
1320              
1321 13 50       34 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1322              
1323 13         41 my $sbjct_len = $self->logical_length('sbjct');
1324 13 100       39 return undef if $sbjct_len == 0; # Avoid division by 0 crash
1325 10         27 sprintf( "%.2f", $self->length_aln('sbjct') / $sbjct_len);
1326             }
1327              
1328              
1329             ## These methods are being maintained for backward compatibility.
1330              
1331             =head2 frac_aligned_sbjct
1332              
1333             Same as L
1334              
1335             =cut
1336              
1337             *frac_aligned_sbjct = \&frac_aligned_hit;
1338              
1339             =head2 num_unaligned_sbjct
1340              
1341             Same as L
1342              
1343             =cut
1344              
1345             *num_unaligned_sbjct = \&num_unaligned_hit;
1346              
1347              
1348             =head2 num_unaligned_hit
1349              
1350             Usage : $hit_object->num_unaligned_hit();
1351             Purpose : Get the number of the unaligned residues in the hit sequence.
1352             : Sums across all all HSPs.
1353             Example : $num_unaln = $hit_object->num_unaligned_hit();
1354             Returns : Integer
1355             Argument : n/a
1356             Throws : n/a
1357             Comments : See notes regarding logical lengths in the comments for frac_aligned_hit().
1358             : They apply here as well.
1359             : If you need data for each HSP, use hsps() and then interate
1360             : through the HSP objects.
1361             : This method requires that all HSPs be tiled. If they have not
1362             : already been tiled, they will be tiled first automatically..
1363              
1364             See Also : L, L, L
1365              
1366             =cut
1367              
1368             sub num_unaligned_hit {
1369 0     0 1 0 my $self = shift;
1370              
1371 0 0       0 unless ($self->{'_hsps'}) {
1372 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1373             #return wantarray ? ('-','-') : '-';
1374 0         0 return '-';
1375             }
1376              
1377 0 0       0 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1378              
1379 0         0 my $num = $self->logical_length('sbjct') - $self->length_aln('sbjct');
1380 0 0       0 ($num < 0 ? 0 : $num );
1381             }
1382              
1383              
1384             =head2 num_unaligned_query
1385              
1386             Usage : $hit_object->num_unaligned_query();
1387             Purpose : Get the number of the unaligned residues in the query sequence.
1388             : Sums across all all HSPs.
1389             Example : $num_unaln = $hit_object->num_unaligned_query();
1390             Returns : Integer
1391             Argument : n/a
1392             Throws : n/a
1393             Comments : See notes regarding logical lengths in the comments for frac_aligned_query().
1394             : They apply here as well.
1395             : If you need data for each HSP, use hsps() and then interate
1396             : through the HSP objects.
1397             : This method requires that all HSPs be tiled. If they have not
1398             : already been tiled, they will be tiled first automatically..
1399              
1400             See Also : L, L, L
1401              
1402             =cut
1403              
1404             sub num_unaligned_query {
1405 0     0 1 0 my $self = shift;
1406              
1407 0 0       0 unless ($self->{'_hsps'}) {
1408 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1409             #return wantarray ? ('-','-') : '-';
1410 0         0 return '-';
1411             }
1412              
1413 0 0       0 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1414              
1415 0         0 my $num = $self->logical_length('query') - $self->length_aln('query');
1416 0 0       0 ($num < 0 ? 0 : $num );
1417             }
1418              
1419              
1420              
1421             =head2 seq_inds
1422              
1423             Usage : $hit->seq_inds( seq_type, class, collapse );
1424             Purpose : Get a list of residue positions (indices) across all HSPs
1425             : for identical or conserved residues in the query or sbjct sequence.
1426             Example : @s_ind = $hit->seq_inds('query', 'identical');
1427             : @h_ind = $hit->seq_inds('hit', 'conserved');
1428             : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
1429             Returns : Array of integers
1430             : May include ranges if collapse is non-zero.
1431             Argument : [0] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
1432             : ('sbjct' is synonymous with 'hit')
1433             : [1] class = 'identical' or 'conserved' (default = 'identical')
1434             : (can be shortened to 'id' or 'cons')
1435             : (actually, anything not 'id' will evaluate to 'conserved').
1436             : [2] collapse = boolean, if non-zero, consecutive positions are merged
1437             : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
1438             : collapses to "1-5 7 9-11". This is useful for
1439             : consolidating long lists. Default = no collapse.
1440             Throws : n/a.
1441              
1442             See Also : L
1443              
1444             =cut
1445              
1446             sub seq_inds {
1447 0     0 1 0 my ($self, $seqType, $class, $collapse) = @_;
1448              
1449 0   0     0 $seqType ||= 'query';
1450 0   0     0 $class ||= 'identical';
1451 0   0     0 $collapse ||= 0;
1452              
1453 0 0       0 $seqType = 'sbjct' if $seqType eq 'hit';
1454              
1455 0         0 my (@inds, $hsp);
1456 0         0 foreach $hsp ($self->hsps) {
1457             # This will merge data for all HSPs together.
1458 0         0 push @inds, $hsp->seq_inds($seqType, $class);
1459             }
1460            
1461             # Need to remove duplicates and sort the merged positions.
1462 0 0       0 if(@inds) {
1463 0         0 my %tmp = map { $_, 1 } @inds;
  0         0  
1464 0         0 @inds = sort {$a <=> $b} keys %tmp;
  0         0  
1465             }
1466              
1467 0 0       0 $collapse ? &Bio::Search::SearchUtils::collapse_nums(@inds) : @inds;
1468             }
1469              
1470              
1471             =head2 strand
1472              
1473             See documentation in L
1474              
1475             =cut
1476              
1477             sub strand {
1478 20     20 1 37 my ($self, $seqType, $strnd) = @_;
1479              
1480 20 50       44 unless ($self->{'_hsps'}) {
1481 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1482 0 0       0 return wantarray ? ('-','-') : '-';
1483             #return '-';
1484             }
1485              
1486 20 50       30 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1487              
1488 20 0 33     38 $seqType ||= (wantarray ? 'list' : 'query');
1489 20 100       50 $seqType = 'sbjct' if $seqType eq 'hit';
1490              
1491 20         27 $seqType = lc($seqType);
1492              
1493 20 50       33 if( defined $strnd ) {
1494 20 50 66     63 $self->throw("Can't set strand for seqType '$seqType'. Must be 'query' or 'hit'\n") unless ($seqType eq 'sbjct' or $seqType eq 'query');
1495              
1496 20         55 return $self->{'_strand_'.$seqType} = $strnd;
1497             }
1498              
1499 0         0 my ($qstr, $hstr);
1500             # If there is only one HSP, defer this call to the solitary HSP.
1501 0 0       0 if($self->num_hsps == 1) {
    0          
1502 0         0 return $self->hsp->strand($seqType);
1503             }
1504             elsif( defined $self->{'_strand_query'}) {
1505             # Get the data computed during hsp tiling.
1506 0         0 $qstr = $self->{'_strand_query'};
1507 0         0 $hstr = $self->{'_strand_sbjct'}
1508             }
1509             else {
1510             # otherwise, iterate through all HSPs collecting strand info.
1511             # This will return the string "-1/1" if there are HSPs on different strands.
1512             # NOTE: This was the pre-10/21/02 procedure which will no longer be used,
1513             # (unless the above elsif{} is commented out).
1514 0         0 my (%qstr, %hstr);
1515 0         0 foreach my $hsp( $self->hsps ) {
1516 0         0 my ( $q, $h ) = $hsp->strand();
1517 0         0 $qstr{ $q }++;
1518 0         0 $hstr{ $h }++;
1519             }
1520 0         0 $qstr = join( '/', sort keys %qstr);
1521 0         0 $hstr = join( '/', sort keys %hstr);
1522             }
1523              
1524 0 0       0 if($seqType =~ /list|array/i) {
    0          
1525 0         0 return ($qstr, $hstr);
1526             } elsif( $seqType eq 'query' ) {
1527 0         0 return $qstr;
1528             } else {
1529 0         0 return $hstr;
1530             }
1531             }
1532              
1533             =head2 frame
1534              
1535             See documentation in L
1536              
1537             =cut
1538              
1539             sub frame {
1540 10     10 1 24 my( $self, $frm ) = @_;
1541              
1542 10 50       30 unless ($self->{'_hsps'}) {
1543 0         0 Bio::Search::SearchUtils::_warn_about_no_hsps($self);
1544             #return wantarray ? ('-','-') : '-';
1545 0         0 return '-';
1546             }
1547              
1548 10 50       25 Bio::Search::SearchUtils::tile_hsps($self) unless $self->tiled_hsps;
1549              
1550 10 50       28 if( defined $frm ) {
1551 10         25 return $self->{'_frame'} = $frm;
1552             }
1553              
1554             # The check for $self->{'_frame'} is a remnant from the 'query' mode days
1555             # in which the sbjct object would collect data from the description line only.
1556              
1557 0         0 my ($frame);
1558 0 0       0 if(not defined($self->{'_frame'})) {
1559 0         0 $frame = $self->hsp->frame('hit');
1560             } else {
1561 0         0 $frame = $self->{'_frame'};
1562             }
1563 0         0 return $frame;
1564             }
1565              
1566             =head2 rank
1567              
1568             Title : rank
1569             Usage : $obj->rank($newval)
1570             Function: Get/Set the rank of this Hit in the Query search list
1571             i.e. this is the Nth hit for a specific query
1572             Returns : value of rank
1573             Args : newvalue (optional)
1574              
1575              
1576             =cut
1577              
1578             sub rank {
1579 3523     3523 1 3661 my $self = shift;
1580 3523 100       8003 return $self->{'_rank'} = shift if @_;
1581 3   50     18 return $self->{'_rank'} || 1;
1582             }
1583              
1584             =head2 locus
1585              
1586             Title : locus
1587             Usage : $locus = $hit->locus();
1588             Function: Retrieve the locus (if available) for the hit
1589             Returns : a scalar string (empty string if not set)
1590             Args : none
1591              
1592             =cut
1593              
1594             sub locus {
1595 1     1 1 4 my ($self,$value) = @_;
1596 1         2 my $previous = $self->{'_locus'};
1597 1 50 33     8 if( defined $value || ! defined $previous ) {
1598 1 50       3 unless (defined $value) {
1599 1 50       8 if ($self->{'_name'} =~/(gb|emb|dbj|ref)\|(.*)\|(.*)/) {
1600 1         3 $value = $previous = $3;
1601             } else {
1602 0         0 $value = $previous = '';
1603             }
1604             }
1605 1         3 $self->{'_locus'} = $value;
1606             }
1607 1         4 return $previous;
1608             }
1609              
1610             =head2 each_accession_number
1611              
1612             Title : each_accession_number
1613             Usage : @each_accession_number = $hit->each_accession_number();
1614             Function: Get each accession number listed in the description of the hit.
1615             If there are no alternatives, then only the primary accession will
1616             be given
1617             Returns : list of all accession numbers in the description
1618             Args : none
1619              
1620             =cut
1621              
1622             sub each_accession_number {
1623 1     1 1 3 my ($self,$value) = @_;
1624 1         3 my $desc = $self->{'_description'};
1625             #put primary accnum on the list
1626 1         1 my @accnums;
1627 1         3 push (@accnums,$self->{'_accession'});
1628 1 50       3 if( defined $desc ) {
1629 1         9 while ($desc =~ /(\b\S+\|\S*\|\S*\s?)/g) {
1630 2         5 my $id = $1;
1631 2         3 my ($acc, $version);
1632 2 50       9 if ($id =~ /(gb|emb|dbj|sp|pdb|bbs|ref|tp[gde])\|(.*)\|(.*)/) {
    0          
    0          
    0          
1633 2         6 ($acc, $version) = split /\./, $2;
1634             } elsif ($id =~ /(pir|prf|pat|gnl)\|(.*)\|(.*)/) {
1635 0         0 ($acc, $version) = split /\./, $3;
1636             } elsif( $id =~ /(gim|gi|bbm|bbs|lcl)\|(\d*)/) {
1637 0         0 $acc = $id;
1638             } elsif( $id =~ /(oth)\|(.*)\|(.*)\|(.*)/ ) { # discontinued...
1639 0         0 ($acc,$version) = ($2);
1640             } else {
1641             #punt, not matching the db's at ftp://ftp.ncbi.nih.gov/blast/db/README
1642             #Database Name Identifier Syntax
1643             #============================ ========================
1644             #GenBank gb|accession|locus
1645             #EMBL Data Library emb|accession|locus
1646             #DDBJ, DNA Database of Japan dbj|accession|locus
1647             #NBRF PIR pir||entry
1648             #Protein Research Foundation prf||name
1649             #SWISS-PROT sp|accession|entry name
1650             #Brookhaven Protein Data Bank pdb|entry|chain
1651             #Patents pat|country|number
1652             #GenInfo Backbone Id bbs|number
1653             #General database identifier gnl|database|identifier
1654             #NCBI Reference Sequence ref|accession|locus
1655             #Local Sequence identifier lcl|identifier
1656 0         0 $acc=$id;
1657             }
1658 2         8 push(@accnums, $acc);
1659             }
1660             }
1661 1         5 return @accnums;
1662             }
1663              
1664             =head2 tiled_hsps
1665              
1666             See documentation in L
1667              
1668             =cut
1669              
1670             sub tiled_hsps {
1671 584     584 1 593 my $self = shift;
1672 584 100       938 return $self->{'_tiled_hsps'} = shift if @_;
1673 536         1031 return $self->{'_tiled_hsps'};
1674             }
1675              
1676             =head2 query_length
1677              
1678             Title : query_length
1679             Usage : $obj->query_length($newval)
1680             Function: Get/Set the query_length
1681             Returns : value of query_length (a scalar)
1682             Args : on set, new value (a scalar or undef, optional)
1683              
1684              
1685             =cut
1686              
1687             sub query_length {
1688 3565     3565 1 4604 my ($self,$value) = @_;
1689 3565         4532 my $previous = $self->{'_query_length'};
1690 3565 100 100     6199 if( defined $value || ! defined $previous ) {
1691 3547 100       4743 $value = $previous = 0 unless defined $value;
1692 3547         4995 $self->{'_query_length'} = $value;
1693             }
1694 3565         3875 return $previous;
1695             }
1696              
1697             =head2 ncbi_gi
1698              
1699             Title : ncbi_gi
1700             Usage : $acc = $hit->ncbi_gi();
1701             Function: Retrieve the NCBI Unique ID (aka the GI #),
1702             if available, for the hit
1703             Returns : a scalar string (empty string if not set)
1704             Args : none
1705             Note : As of Sept. 2016 NCBI records will no longer have a
1706             GI; this attributue will remain in place for older
1707             records
1708            
1709             =cut
1710              
1711             sub ncbi_gi {
1712 10     10 1 29 my ($self,$value) = @_;
1713 10 50       35 if( defined $value ) {
1714 0         0 $self->{'_ncbi_gi'} = $value;
1715             } else {
1716 10 50       28 $self->{'_ncbi_gi'} = $self->name =~ m{^gi\|(\d+)} ? $1 : '';
1717             }
1718 10         45 return $self->{'_ncbi_gi'};
1719             }
1720              
1721              
1722             # sort method for HSPs
1723              
1724             =head2 sort_hits
1725              
1726             Title : sort_hsps
1727             Usage : $result->sort_hsps(\&sort_function)
1728             Function : Sorts the available HSP objects by a user-supplied function. Defaults to sort
1729             by descending score.
1730             Returns : n/a
1731             Args : A coderef for the sort function. See the documentation on the Perl sort()
1732             function for guidelines on writing sort functions.
1733             Note : To access the special variables $a and $b used by the Perl sort() function
1734             the user function must access Bio::Search::Hit::HitI namespace.
1735             For example, use :
1736             $hit->sort_hsps( sub{$Bio::Search::Result::HitI::a->length <=>
1737             $Bio::Search::Result::HitI::b->length});
1738             NOT $hit->sort_hsps($a->length <=> $b->length);
1739              
1740             =cut
1741              
1742             sub sort_hsps {
1743 0     0 1   my ($self, $coderef) = @_;
1744 0           my @sorted_hsps;
1745              
1746 0 0         if ($coderef) {
1747 0 0         $self->throw('sort_hsps requires a sort function passed as a subroutine reference')
1748             unless (ref($coderef) eq 'CODE');
1749             }
1750             else {
1751 0           $coderef = \&_default_sort_hsps;
1752             # throw a warning?
1753             }
1754              
1755 0           my @hsps = $self->hsps();
1756 0           eval {@sorted_hsps = sort $coderef @hsps };
  0            
1757              
1758 0 0         if ($@) {
1759 0           $self->throw("Unable to sort hsps: $@");
1760             }
1761             else {
1762 0           $self->{'_hsps'} = \@sorted_hsps;
1763 0           1;
1764             }
1765             }
1766              
1767             =head2 iteration
1768              
1769             Usage : $hit->iteration( $iteration_num );
1770             Purpose : Gets the iteration number in which the Hit was found.
1771             Example : $iteration_num = $sbjct->iteration();
1772             Returns : Integer greater than or equal to 1
1773             Non-PSI-BLAST reports may report iteration as 1, but this number
1774             is only meaningful for PSI-BLAST reports.
1775             Argument : iteration_num (optional, used when setting only)
1776             Throws : none
1777              
1778             See Also : L
1779              
1780             =cut
1781              
1782             sub iteration{
1783 0     0 1   my ($self,$value) = @_;
1784 0 0         if( defined $value) {
1785 0           $self->{'_psiblast_iteration'} = $value;
1786             }
1787 0           return $self->{'_psiblast_iteration'};
1788             }
1789              
1790             =head2 found_again
1791              
1792             Title : found_again
1793             Usage : $hit->found_again;
1794             $hit->found_again(1);
1795             Purpose : Gets a boolean indicator whether or not the hit has
1796             been found in a previous iteration.
1797             This is only applicable to PSI-BLAST reports.
1798              
1799             This method indicates if the hit was reported in the
1800             "Sequences used in model and found again" section of the
1801             PSI-BLAST report or if it was reported in the
1802             "Sequences not found previously or not previously below threshold"
1803             section of the PSI-BLAST report. Only for hits in iteration > 1.
1804              
1805             Example : if( $hit->found_again()) { ... };
1806             Returns : Boolean, true (1) if the hit has been found in a
1807             previous PSI-BLAST iteration.
1808             Returns false (0 or undef) for hits that have not occurred in a
1809             previous PSI-BLAST iteration.
1810             Argument : Boolean (1 or 0). Only used for setting.
1811             Throws : none
1812              
1813             See Also : L
1814              
1815             =cut
1816              
1817             sub found_again {
1818 0     0 1   my $self = shift;
1819 0 0         return $self->{'_found_again'} = shift if @_;
1820 0           return $self->{'_found_again'};
1821             }
1822              
1823             1;