File Coverage

Bio/Search/Hit/HitI.pm
Criterion Covered Total %
statement 7 60 11.6
branch 0 6 0.0
condition 0 6 0.0
subroutine 3 32 9.3
pod 27 29 93.1
total 37 133 27.8


line stmt bran cond sub pod time code
1             #-----------------------------------------------------------------
2             #
3             # BioPerl module Bio::Search::Hit::HitI
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Steve Chervitz
8             #
9             # Originally created by Aaron Mackey
10             #
11             # You may distribute this module under the same terms as perl itself
12             #-----------------------------------------------------------------
13              
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::Search::Hit::HitI - Interface for a hit in a similarity search result
19              
20             =head1 SYNOPSIS
21              
22             # Bio::Search::Hit::HitI objects should not be instantiated since this
23             # module defines a pure interface.
24              
25             # Given an object that implements the Bio::Search::Hit::HitI interface,
26             # you can do the following things with it:
27              
28             # Get a HitI object from a SearchIO stream:
29             use Bio::SeachIO;
30             my $searchio = Bio::SearchIO->new(-format => 'blast', -file => 'result.bls');
31             my $result = $searchio->next_result;
32             my $hit = $result->next_hit;
33              
34             $hit_name = $hit->name();
35              
36             $desc = $hit->description();
37              
38             $len = $hit->length
39              
40             $alg = $hit->algorithm();
41              
42             $score = $hit->raw_score();
43              
44             $significance = $hit->significance();
45              
46             $rank = $hit->rank(); # the Nth hit for a specific query
47              
48             while( $hsp = $obj->next_hsp()) { ... } # process in iterator fashion
49              
50             for my $hsp ( $obj->hsps()()) { ... } # process in list fashion
51              
52             =head1 DESCRIPTION
53              
54             Bio::Search::Hit::* objects are data structures that contain information
55             about specific hits obtained during a library search. Some information will
56             be algorithm-specific, but others will be generally defined.
57              
58             =head1 FEEDBACK
59              
60             =head2 Mailing Lists
61              
62             User feedback is an integral part of the evolution of this and other
63             Bioperl modules. Send your comments and suggestions preferably to one
64             of the Bioperl mailing lists. Your participation is much appreciated.
65              
66             bioperl-l@bioperl.org - General discussion
67             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
68              
69             =head2 Support
70              
71             Please direct usage questions or support issues to the mailing list:
72              
73             I
74              
75             rather than to the module maintainer directly. Many experienced and
76             reponsive experts will be able look at the problem and quickly
77             address it. Please include a thorough description of the problem
78             with code and data examples if at all possible.
79              
80             =head2 Reporting Bugs
81              
82             Report bugs to the Bioperl bug tracking system to help us keep track
83             the bugs and their resolution. Bug reports can be submitted via the
84             web:
85              
86             https://github.com/bioperl/bioperl-live/issues
87              
88             =head1 AUTHOR - Aaron Mackey, Steve Chervitz
89              
90             Email amackey@virginia.edu (original author)
91             Email sac@bioperl.org
92              
93             =head1 COPYRIGHT
94              
95             Copyright (c) 1999-2001 Aaron Mackey, Steve Chervitz. All Rights Reserved.
96              
97             =head1 DISCLAIMER
98              
99             This software is provided "as is" without warranty of any kind.
100              
101             =head1 APPENDIX
102              
103             The rest of the documentation details each of the object
104             methods. Internal methods are usually preceded with a _
105              
106             =cut
107              
108             # Let the code begin...
109              
110             package Bio::Search::Hit::HitI;
111              
112              
113 29     29   243 use strict;
  29         58  
  29         938  
114              
115 29     29   155 use base qw(Bio::Root::RootI);
  29         65  
  29         21949  
116              
117              
118             =head2 name
119              
120             Title : name
121             Usage : $hit_name = $hit->name();
122             Function: returns the name of the Hit sequence
123             Returns : a scalar string
124             Args : none
125              
126             The B of a hit is unique within a Result or within an Iteration.
127              
128             =cut
129              
130             sub name {
131 0     0 1 0 my ($self,@args) = @_;
132 0         0 $self->throw_not_implemented;
133             }
134              
135             =head2 description
136              
137             Title : description
138             Usage : $desc = $hit->description();
139             Function: Retrieve the description for the hit
140             Returns : a scalar string
141             Args : none
142              
143             =cut
144              
145             sub description {
146 0     0 1 0 my ($self,@args) = @_;
147 0         0 $self->throw_not_implemented;
148             }
149              
150              
151             =head2 accession
152              
153             Title : accession
154             Usage : $acc = $hit->accession();
155             Function: Retrieve the accession (if available) for the hit
156             Returns : a scalar string (empty string if not set)
157             Args : none
158              
159             =cut
160              
161             sub accession {
162 0     0 1 0 my ($self,@args) = @_;
163 0         0 $self->throw_not_implemented;
164             }
165              
166             =head2 locus
167              
168             Title : locus
169             Usage : $acc = $hit->locus();
170             Function: Retrieve the locus(if available) for the hit
171             Returns : a scalar string (empty string if not set)
172             Args : none
173              
174             =cut
175              
176             sub locus {
177 0     0 1 0 my ($self,@args) = @_;
178 0         0 $self->throw_not_implemented;
179             }
180              
181             =head2 length
182              
183             Title : length
184             Usage : my $len = $hit->length
185             Function: Returns the length of the hit
186             Returns : integer
187             Args : none
188              
189             =cut
190              
191             sub length {
192 0     0 1 0 my ($self,@args) = @_;
193 0         0 $self->throw_not_implemented;
194             }
195              
196              
197             =head2 algorithm
198              
199             Title : algorithm
200             Usage : $alg = $hit->algorithm();
201             Function: Gets the algorithm specification that was used to obtain the hit
202             For BLAST, the algorithm denotes what type of sequence was aligned
203             against what (BLASTN: dna-dna, BLASTP prt-prt, BLASTX translated
204             dna-prt, TBLASTN prt-translated dna, TBLASTX translated
205             dna-translated dna).
206             Returns : a scalar string
207             Args : none
208              
209             =cut
210              
211             sub algorithm {
212 0     0 1 0 my ($self,@args) = @_;
213 0         0 $self->throw_not_implemented;
214             }
215              
216             =head2 raw_score
217              
218             Title : raw_score
219             Usage : $score = $hit->raw_score();
220             Function: Gets the "raw score" generated by the algorithm. What
221             this score is exactly will vary from algorithm to algorithm,
222             returning undef if unavailable.
223             Returns : a scalar value
224             Args : none
225              
226             =cut
227              
228             sub raw_score {
229 0     0 1 0 $_[0]->throw_not_implemented;
230             }
231              
232             =head2 score
233              
234             Equivalent to L
235              
236             =cut
237              
238 0     0 1 0 sub score { shift->raw_score(@_); }
239              
240             =head2 significance
241              
242             Title : significance
243             Usage : $significance = $hit->significance();
244             Function: Used to obtain the E or P value of a hit, i.e. the probability that
245             this particular hit was obtained purely by random chance. If
246             information is not available (nor calculatable from other
247             information sources), return undef.
248             Returns : a scalar value or undef if unavailable
249             Args : none
250              
251             =cut
252              
253             sub significance {
254 0     0 1 0 $_[0]->throw_not_implemented;
255             }
256              
257             =head2 bits
258              
259             Usage : $hit_object->bits();
260             Purpose : Gets the bit score of the best HSP for the current hit.
261             Example : $bits = $hit_object->bits();
262             Returns : Integer or double for FASTA reports
263             Argument : n/a
264             Comments : For BLAST1, the non-bit score is listed in the summary line.
265              
266             See Also : L
267              
268             =cut
269              
270             #---------
271             sub bits {
272             #---------
273 0     0 1 0 $_[0]->throw_not_implemented();
274             }
275              
276             =head2 next_hsp
277              
278             Title : next_hsp
279             Usage : while( $hsp = $obj->next_hsp()) { ... }
280             Function : Returns the next available High Scoring Pair
281             Example :
282             Returns : L object or null if finished
283             Args : none
284              
285             =cut
286              
287             sub next_hsp {
288 0     0 1 0 my ($self,@args) = @_;
289 0         0 $self->throw_not_implemented;
290             }
291              
292              
293             =head2 hsps
294              
295             Usage : $hit_object->hsps();
296             Purpose : Get a list containing all HSP objects.
297             : Get the numbers of HSPs for the current hit.
298             Example : @hsps = $hit_object->hsps();
299             : $num = $hit_object->hsps(); # alternatively, use num_hsps()
300             Returns : Array context : list of L objects.
301             : Scalar context: integer (number of HSPs).
302             : (Equivalent to num_hsps()).
303             Argument : n/a. Relies on wantarray
304             Throws : Exception if the HSPs have not been collected.
305              
306             See Also : L, L
307              
308             =cut
309              
310             #---------
311             sub hsps {
312             #---------
313 0     0 1 0 my $self = shift;
314              
315 0         0 $self->throw_not_implemented();
316             }
317              
318              
319              
320             =head2 num_hsps
321              
322             Usage : $hit_object->num_hsps();
323             Purpose : Get the number of HSPs for the present Blast hit.
324             Example : $nhsps = $hit_object->num_hsps();
325             Returns : Integer
326             Argument : n/a
327             Throws : Exception if the HSPs have not been collected.
328              
329             See Also : L
330              
331             =cut
332              
333             #-------------
334             sub num_hsps {
335             #-------------
336 0     0 1 0 shift->throw_not_implemented();
337             }
338              
339              
340             =head2 seq_inds
341              
342             Usage : $hit->seq_inds( seq_type, class, collapse );
343             Purpose : Get a list of residue positions (indices) across all HSPs
344             : for identical or conserved residues in the query or sbjct sequence.
345             Example : @s_ind = $hit->seq_inds('query', 'identical');
346             : @h_ind = $hit->seq_inds('hit', 'conserved');
347             : @h_ind = $hit->seq_inds('hit', 'conserved', 1);
348             Returns : Array of integers
349             : May include ranges if collapse is non-zero.
350             Argument : [0] seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
351             : ('sbjct' is synonymous with 'hit')
352             : [1] class = 'identical' or 'conserved' (default = 'identical')
353             : (can be shortened to 'id' or 'cons')
354             : (actually, anything not 'id' will evaluate to 'conserved').
355             : [2] collapse = boolean, if non-zero, consecutive positions are merged
356             : using a range notation, e.g., "1 2 3 4 5 7 9 10 11"
357             : collapses to "1-5 7 9-11". This is useful for
358             : consolidating long lists. Default = no collapse.
359             Throws : n/a.
360              
361             See Also : L
362              
363             =cut
364              
365             #-------------
366             sub seq_inds {
367             #-------------
368 0     0 1 0 my ($self, $seqType, $class, $collapse) = @_;
369              
370 0   0     0 $seqType ||= 'query';
371 0   0     0 $class ||= 'identical';
372 0   0     0 $collapse ||= 0;
373              
374 0 0       0 $seqType = 'sbjct' if $seqType eq 'hit';
375              
376 0         0 my (@inds, $hsp);
377 0         0 foreach $hsp ($self->hsps) {
378             # This will merge data for all HSPs together.
379 0         0 push @inds, $hsp->seq_inds($seqType, $class);
380             }
381            
382             # Need to remove duplicates and sort the merged positions.
383 0 0       0 if(@inds) {
384 0         0 my %tmp = map { $_, 1 } @inds;
  0         0  
385 0         0 @inds = sort {$a <=> $b} keys %tmp;
  0         0  
386             }
387              
388 0 0       0 $collapse ? &Bio::Search::BlastUtils::collapse_nums(@inds) : @inds;
389             }
390              
391             =head2 rewind
392              
393             Title : rewind
394             Usage : $hit->rewind;
395             Function: Allow one to reset the HSP iterator to the beginning
396             if possible
397             Returns : none
398             Args : none
399              
400             =cut
401              
402             sub rewind{
403 0     0 1 0 my ($self) = @_;
404 0         0 $self->throw_not_implemented();
405             }
406              
407              
408             =head2 overlap
409              
410             Usage : $hit_object->overlap( [integer] );
411             Purpose : Gets/Sets the allowable amount overlap between different HSP sequences.
412             Example : $hit_object->overlap(5);
413             : $overlap = $hit_object->overlap;
414             Returns : Integer.
415             Argument : integer.
416             Throws : n/a
417             Status : Experimental
418             Comments : Any two HSPs whose sequences overlap by less than or equal
419             : to the overlap() number of resides will be considered separate HSPs
420             : and will not get tiled by L.
421              
422             See Also : L, L
423              
424             =cut
425              
426             #-------------
427 0     0 1 0 sub overlap { shift->throw_not_implemented }
428              
429              
430             =head2 n
431              
432             Usage : $hit_object->n();
433             Purpose : Gets the N number for the current Blast hit.
434             : This is the number of HSPs in the set which was ascribed
435             : the lowest P-value (listed on the description line).
436             : This number is not the same as the total number of HSPs.
437             : To get the total number of HSPs, use num_hsps().
438             Example : $n = $hit_object->n();
439             Returns : Integer
440             Argument : n/a
441             Throws : Exception if HSPs have not been set (BLAST2 reports).
442             Comments : Note that the N parameter is not reported in gapped BLAST2.
443             : Calling n() on such reports will result in a call to num_hsps().
444             : The num_hsps() method will count the actual number of
445             : HSPs in the alignment listing, which may exceed N in
446             : some cases.
447              
448             See Also : L
449              
450             =cut
451              
452             #-----
453 0     0 1 0 sub n { shift->throw_not_implemented }
454              
455             =head2 p
456              
457             Usage : $hit_object->p( [format] );
458             Purpose : Get the P-value for the best HSP of the given BLAST hit.
459             : (Note that P-values are not provided with NCBI Blast2 reports).
460             Example : $p = $sbjct->p;
461             : $p = $sbjct->p('exp'); # get exponent only.
462             : ($num, $exp) = $sbjct->p('parts'); # split sci notation into parts
463             Returns : Float or scientific notation number (the raw P-value, DEFAULT).
464             : Integer if format == 'exp' (the magnitude of the base 10 exponent).
465             : 2-element list (float, int) if format == 'parts' and P-value
466             : is in scientific notation (See Comments).
467             Argument : format: string of 'raw' | 'exp' | 'parts'
468             : 'raw' returns value given in report. Default. (1.2e-34)
469             : 'exp' returns exponent value only (34)
470             : 'parts' returns the decimal and exponent as a
471             : 2-element list (1.2, -34) (See Comments).
472             Throws : Warns if no P-value is defined. Uses expect instead.
473             Comments : Using the 'parts' argument is not recommended since it will not
474             : work as expected if the P-value is not in scientific notation.
475             : That is, floats are not converted into sci notation before
476             : splitting into parts.
477              
478             See Also : L, L, L
479              
480             =cut
481              
482             #--------
483 0     0 1 0 sub p { shift->throw_not_implemented() }
484              
485             =head2 hsp
486              
487             Usage : $hit_object->hsp( [string] );
488             Purpose : Get a single HSPI object for the present HitI object.
489             Example : $hspObj = $hit_object->hsp; # same as 'best'
490             : $hspObj = $hit_object->hsp('best');
491             : $hspObj = $hit_object->hsp('worst');
492             Returns : Object reference for a L object.
493             Argument : String (or no argument).
494             : No argument (default) = highest scoring HSP (same as 'best').
495             : 'best' or 'first' = highest scoring HSP.
496             : 'worst' or 'last' = lowest scoring HSP.
497             Throws : Exception if the HSPs have not been collected.
498             : Exception if an unrecognized argument is used.
499              
500             See Also : L, L()
501              
502             =cut
503              
504             #----------
505 0     0 1 0 sub hsp { shift->throw_not_implemented }
506              
507             =head2 logical_length
508              
509             Usage : $hit_object->logical_length( [seq_type] );
510             : (mostly intended for internal use).
511             Purpose : Get the logical length of the hit sequence.
512             : If the Blast is a TBLASTN or TBLASTX, the returned length
513             : is the length of the would-be amino acid sequence (length/3).
514             : For all other BLAST flavors, this function is the same as length().
515             Example : $len = $hit_object->logical_length();
516             Returns : Integer
517             Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
518             ('sbjct' is synonymous with 'hit')
519             Throws : n/a
520             Comments : This is important for functions like frac_aligned_query()
521             : which need to operate in amino acid coordinate space when dealing
522             : with [T]BLAST[NX] type reports.
523              
524             See Also : L, L, L
525              
526             =cut
527              
528             #--------------------
529 0     0 1 0 sub logical_length { shift->throw_not_implemented() }
530              
531              
532             =head2 rank
533              
534             Title : rank
535             Usage : $obj->rank($newval)
536             Function: Get/Set the rank of this Hit in the Query search list
537             i.e. this is the Nth hit for a specific query
538             Returns : value of rank
539             Args : newvalue (optional)
540              
541              
542             =cut
543              
544             sub rank{
545 0     0 1 0 my ($self,$value) = @_;
546 0         0 $self->throw_not_implemented();
547             }
548              
549             =head2 each_accession_number
550              
551             Title : each_accession_number
552             Usage : $obj->each_accession_number
553             Function: Get each accession number listed in the description of the hit.
554             If there are no alternatives, then only the primary accession will
555             be given
556             Returns : list of all accession numbers in the description
557             Args : none
558              
559              
560             =cut
561              
562             sub each_accession_number{
563 0     0 1 0 my ($self,$value) = @_;
564 0         0 $self->throw_not_implemented();
565             }
566              
567              
568             =head2 tiled_hsps
569              
570             Usage : $hit_object->tiled_hsps( [integer] );
571             Purpose : Gets/Sets an indicator for whether or not the HSPs in this Hit
572             : have been tiled.
573             : Methods that rely on HSPs being tiled should check this
574             : and then call SearchUtils::tile_hsps() if not.
575             Example : $hit_object->tiled_hsps(1);
576             : if( $hit_object->tiled_hsps ) { # do something }
577             Returns : Boolean (1 or 0)
578             Argument : integer (optional)
579             Throws : n/a
580              
581             =cut
582              
583 0     0 1 0 sub tiled_hsps { shift->throw_not_implemented }
584              
585              
586             =head2 strand
587              
588             Usage : $sbjct->strand( [seq_type] );
589             Purpose : Gets the strand(s) for the query, sbjct, or both sequences
590             : in the best HSP of the BlastHit object after HSP tiling.
591             : Only valid for BLASTN, TBLASTX, BLASTX-query, TBLASTN-hit.
592             Example : $qstrand = $sbjct->strand('query');
593             : $sstrand = $sbjct->strand('hit');
594             : ($qstrand, $sstrand) = $sbjct->strand();
595             Returns : scalar context: integer '1', '-1', or '0'
596             : array context without args: list of two strings (queryStrand, sbjctStrand)
597             : Array context can be "induced" by providing an argument of 'list' or 'array'.
598             Argument : In scalar context: seq_type = 'query' or 'hit' or 'sbjct' (default = 'query')
599             ('sbjct' is synonymous with 'hit')
600             Throws : n/a
601             Comments : This method requires that all HSPs be tiled. If they have not
602             : already been tiled, they will be tiled first automatically..
603             : If you don't want the tiled data, iterate through each HSP
604             : calling strand() on each (use hsps() to get all HSPs).
605             :
606             : Formerly (prior to 10/21/02), this method would return the
607             : string "-1/1" for hits with HSPs on both strands.
608             : However, now that strand and frame is properly being accounted
609             : for during HSP tiling, it makes more sense for strand()
610             : to return the strand data for the best HSP after tiling.
611             :
612             : If you really want to know about hits on opposite strands,
613             : you should be iterating through the HSPs using methods on the
614             : HSP objects.
615             :
616             : A possible use case where knowing whether a hit has HSPs
617             : on both strands would be when filtering via SearchIO for hits with
618             : this property. However, in this case it would be better to have a
619             : dedicated method such as $hit->hsps_on_both_strands(). Similarly
620             : for frame. This could be provided if there is interest.
621              
622             See Also : L()
623              
624             =cut
625              
626             #---------'
627 0     0 1 0 sub strand { shift->throw_not_implemented }
628              
629              
630             =head2 frame
631              
632             Usage : $hit_object->frame();
633             Purpose : Gets the reading frame for the best HSP after HSP tiling.
634             : This is only valid for BLASTX and TBLASTN/X type reports.
635             Example : $frame = $hit_object->frame();
636             Returns : Integer (-2 .. +2)
637             Argument : n/a
638             Throws : Exception if HSPs have not been set.
639             Comments : This method requires that all HSPs be tiled. If they have not
640             : already been tiled, they will be tiled first automatically..
641             : If you don't want the tiled data, iterate through each HSP
642             : calling frame() on each (use hsps() to get all HSPs).
643              
644             See Also : L
645              
646             =cut
647              
648             #---------'
649 0     0 1 0 sub frame { shift->throw_not_implemented }
650              
651              
652             =head2 matches
653              
654             Usage : $hit_object->matches( [class] );
655             Purpose : Get the total number of identical or conserved matches
656             : (or both) across all HSPs.
657             : (Note: 'conservative' matches are indicated as 'positives'
658             : in BLAST reports.)
659             Example : ($id,$cons) = $hit_object->matches(); # no argument
660             : $id = $hit_object->matches('id');
661             : $cons = $hit_object->matches('cons');
662             Returns : Integer or a 2-element array of integers
663             Argument : class = 'id' | 'cons' OR none.
664             : If no argument is provided, both identical and conservative
665             : numbers are returned in a two element list.
666             : (Other terms can be used to refer to the conservative
667             : matches, e.g., 'positive'. All that is checked is whether or
668             : not the supplied string starts with 'id'. If not, the
669             : conservative matches are returned.)
670             Throws : Exception if the requested data cannot be obtained.
671             Comments : This method requires that all HSPs be tiled. If there is more than one
672             : HSP and they have not already been tiled, they will be tiled first automatically..
673             :
674             : If you need data for each HSP, use hsps() and then interate
675             : through the HSP objects.
676             : Does not rely on wantarray to return a list. Only checks for
677             : the presence of an argument (no arg = return list).
678              
679             See Also : L, L
680              
681             =cut
682              
683 0     0 1 0 sub matches { shift->throw_not_implemented }
684              
685              
686             # aliasing for Steve's method names
687 0     0 0 0 sub hit_description { shift->description(@_) }
688             # aliasing for Steve's method names
689 21     21 0 49 sub hit_length { shift->length(@_) }
690              
691              
692             # sort method for HSPs
693              
694             =head2 sort_hits
695              
696             Title : sort_hsps
697             Usage : $result->sort_hsps(\&sort_function)
698             Function : Sorts the available HSP objects by a user-supplied function. Defaults to sort
699             by descending score.
700             Returns : n/a
701             Args : A coderef for the sort function. See the documentation on the Perl sort()
702             function for guidelines on writing sort functions.
703             Note : To access the special variables $a and $b used by the Perl sort() function
704             the user function must access Bio::Search::Hit::HitI namespace.
705             For example, use :
706             $hit->sort_hsps( sub{$Bio::Search::Result::HitI::a->length <=>
707             $Bio::Search::Result::HitI::b->length});
708             NOT $hit->sort_hsps($a->length <=> $b->length);
709              
710             =cut
711              
712 0     0 1   sub sort_hsps {shift->throw_not_implemented }
713              
714             =head2 _default sort_hsps
715              
716             Title : _default_sort_hsps
717             Usage : Do not call directly.
718             Function : Sort hsps in ascending order by evalue
719             Args : None
720             Returns: 1 on success
721             Note : Used by $hit->sort_hsps()
722              
723             =cut
724              
725             sub _default_sort_hsps {
726 0     0     $Bio::Search::Hit::HitI::a->evalue <=>
727             $Bio::Search::Hit::HitI::a->evalue;
728             }
729              
730             1;
731              
732              
733              
734