File Coverage

Bio/SearchIO/IteratedSearchResultEventBuilder.pm
Criterion Covered Total %
statement 89 92 96.7
branch 28 28 100.0
condition 9 23 39.1
subroutine 10 10 100.0
pod 5 5 100.0
total 141 158 89.2


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------
2             #
3             # BioPerl module for Bio::SearchIO::IteratedSearchResultEventBuilder
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Steve Chervitz and Jason Stajich
8             #
9             # Copyright Steve Chervitz
10             #
11             # You may distribute this module under the same terms as perl itself
12             #------------------------------------------------------------------
13              
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::SearchIO::IteratedSearchResultEventBuilder - Event Handler for
19             SearchIO events.
20              
21             =head1 SYNOPSIS
22              
23             # Do not use this object directly, this object is part of the SearchIO
24             # event based parsing system.
25              
26             =head1 DESCRIPTION
27              
28             This object handles Search Events generated by the SearchIO classes
29             and build appropriate Bio::Search::* objects from them.
30              
31             =head1 FEEDBACK
32              
33             =head2 Mailing Lists
34              
35             User feedback is an integral part of the evolution of this and other
36             Bioperl modules. Send your comments and suggestions preferably to
37             the Bioperl mailing list. Your participation is much appreciated.
38              
39             bioperl-l@bioperl.org - General discussion
40             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
41              
42             =head2 Support
43              
44             Please direct usage questions or support issues to the mailing list:
45              
46             I
47              
48             rather than to the module maintainer directly. Many experienced and
49             reponsive experts will be able look at the problem and quickly
50             address it. Please include a thorough description of the problem
51             with code and data examples if at all possible.
52              
53             =head2 Reporting Bugs
54              
55             Report bugs to the Bioperl bug tracking system to help us keep track
56             of the bugs and their resolution. Bug reports can be submitted via the
57             web:
58              
59             https://github.com/bioperl/bioperl-live/issues
60              
61             =head1 AUTHOR - Steve Chervitz
62              
63             Email sac-at-bioperl.org
64              
65             =head1 CONTRIBUTORS
66              
67             Parts of code based on SearchResultEventBuilder by Jason Stajich
68             jason@bioperl.org
69              
70             Sendu Bala, bix@sendu.me.uk
71              
72             =head1 APPENDIX
73              
74             The rest of the documentation details each of the object methods.
75             Internal methods are usually preceded with a _
76              
77             =cut
78              
79              
80             # Let the code begin...
81              
82              
83             package Bio::SearchIO::IteratedSearchResultEventBuilder;
84              
85 12     12   44 use strict;
  12         11  
  12         286  
86              
87 12     12   39 use Bio::Factory::ObjectFactory;
  12         10  
  12         277  
88              
89 12     12   32 use base qw(Bio::SearchIO::SearchResultEventBuilder);
  12         10  
  12         8423  
90              
91             =head2 new
92              
93             Title : new
94             Usage : my $obj = Bio::SearchIO::IteratedSearchResultEventBuilder->new();
95             Function: Builds a new Bio::SearchIO::IteratedSearchResultEventBuilder object
96             Returns : Bio::SearchIO::IteratedSearchResultEventBuilder
97             Args : -hsp_factory => Bio::Factory::ObjectFactoryI
98             -hit_factory => Bio::Factory::ObjectFactoryI
99             -result_factory => Bio::Factory::ObjectFactoryI
100             -iteration_factory => Bio::Factory::ObjectFactoryI
101             -inclusion_threshold => e-value threshold for inclusion in the
102             PSI-BLAST score matrix model (blastpgp)
103             -signif => float or scientific notation number to be used
104             as a P- or Expect value cutoff
105             -score => integer or scientific notation number to be used
106             as a blast score value cutoff
107             -bits => integer or scientific notation number to be used
108             as a bit score value cutoff
109             -hit_filter => reference to a function to be used for
110             filtering hits based on arbitrary criteria.
111              
112             See L for more information
113              
114             =cut
115              
116             sub new {
117 92     92 1 239 my ($class,@args) = @_;
118 92         329 my $self = $class->SUPER::new(@args);
119 92         498 my ($resultF, $iterationF, $hitF, $hspF) =
120             $self->_rearrange([qw(RESULT_FACTORY
121             ITERATION_FACTORY
122             HIT_FACTORY
123             HSP_FACTORY)],@args);
124 92         357 $self->_init_parse_params(@args);
125              
126             # Note that we need to override the setting of result and factories here
127             # so that we can set different default factories than are set by the super class.
128 92   33     573 $self->register_factory('result', $resultF ||
129             Bio::Factory::ObjectFactory->new(
130             -type => 'Bio::Search::Result::BlastResult',
131             -interface => 'Bio::Search::Result::ResultI'));
132              
133 92   33     179 $self->register_factory('hit', $hitF ||
134             Bio::Factory::ObjectFactory->new(
135             -type => 'Bio::Search::Hit::BlastHit',
136             -interface => 'Bio::Search::Hit::HitI'));
137              
138 92   33     172 $self->register_factory('hsp', $hspF ||
139             Bio::Factory::ObjectFactory->new(
140             -type => 'Bio::Search::HSP::GenericHSP',
141             -interface => 'Bio::Search::HSP::HSPI'));
142              
143             # TODO: Change this to BlastIteration (maybe)
144 92   33     169 $self->register_factory('iteration', $iterationF ||
145             Bio::Factory::ObjectFactory->new(
146             -type => 'Bio::Search::Iteration::GenericIteration',
147             -interface => 'Bio::Search::Iteration::IterationI'));
148              
149 92         273 return $self;
150             }
151              
152             =head2 will_handle
153              
154             Title : will_handle
155             Usage : if( $handler->will_handle($event_type) ) { ... }
156             Function: Tests if this event builder knows how to process a specific event
157             Returns : boolean
158             Args : event type name
159              
160             =cut
161              
162             sub will_handle{
163 338     338 1 351 my ($self,$type) = @_;
164             # these are the events we recognize
165 338   33     2503 return ( $type eq 'hsp' || $type eq 'hit' || $type eq 'result'
166             || $type eq 'iteration' || $type eq 'newhits' || $type eq 'oldhits' );
167             }
168              
169             =head2 SAX methods
170              
171             =cut
172              
173             =head2 start_result
174              
175             Title : start_result
176             Usage : $handler->start_result($resulttype)
177             Function: Begins a result event cycle
178             Returns : none
179             Args : Type of Report
180              
181             =cut
182              
183             sub start_result {
184 96     96 1 131 my $self = shift;
185             #print STDERR "ISREB: start_result()\n";
186 96         404 $self->SUPER::start_result(@_);
187 96         279 $self->{'_iterations'} = [];
188 96         178 $self->{'_iteration_count'} = 0;
189 96         150 $self->{'_old_hit_names'} = undef;
190 96         175 $self->{'_hit_names_below'} = undef;
191 96         348 return;
192             }
193              
194             =head2 start_iteration
195              
196             Title : start_iteration
197             Usage : $handler->start_iteration()
198             Function: Starts an Iteration event cycle
199             Returns : none
200             Args : type of event and associated hashref
201              
202             =cut
203              
204             sub start_iteration {
205 93     93 1 193 my ($self,$type) = @_;
206              
207             #print STDERR "ISREB: start_iteration()\n";
208 93         131 $self->{'_iteration_count'}++;
209              
210             # Reset arrays for the various classes of hits.
211             # $self->{'_newhits_unclassified'} = [];
212 93         211 $self->{'_newhits_below'} = [];
213 93         731 $self->{'_newhits_not_below'} = [];
214 93         234 $self->{'_oldhits_below'} = [];
215 93         176 $self->{'_oldhits_newly_below'} = [];
216 93         272 $self->{'_oldhits_not_below'} = [];
217 93         147 $self->{'_hitcount'} = 0;
218 93         191 return;
219             }
220              
221              
222             =head2 end_iteration
223              
224             Title : end_iteration
225             Usage : $handler->end_iteration()
226             Function: Ends an Iteration event cycle
227             Returns : Bio::Search::Iteration object
228             Args : type of event and associated hashref
229              
230             =cut
231              
232             sub end_iteration {
233 93     93 1 179 my ($self,$type,$data) = @_;
234              
235             # print STDERR "ISREB: end_iteration()\n";
236              
237 0         0 my %args = map { my $v = $data->{$_}; s/ITERATION//; ($_ => $v); }
  0         0  
  0         0  
238 93         180 grep { /^ITERATION/ } keys %{$data};
  2842         2398  
  93         449  
239              
240 93         309 $args{'-number'} = $self->{'_iteration_count'};
241 93         222 $args{'-oldhits_below'} = $self->{'_oldhits_below'};
242 93         192 $args{'-oldhits_newly_below'} = $self->{'_oldhits_newly_below'};
243 93         168 $args{'-oldhits_not_below'} = $self->{'_oldhits_not_below'};
244 93         194 $args{'-newhits_below'} = $self->{'_newhits_below'};
245 93         187 $args{'-newhits_not_below'} = $self->{'_newhits_not_below'};
246 93         262 $args{'-hit_factory'} = $self->factory('hit');
247              
248 93         258 my $it = $self->factory('iteration')->create_object(%args);
249 93         163 push @{$self->{'_iterations'}}, $it;
  93         218  
250 93         271 return $it;
251             }
252              
253             # Title : _add_hit (private function for internal use only)
254             # Purpose : Applies hit filtering and calls _store_hit if it passes filtering.
255             # Argument: Bio::Search::Hit::HitI object
256              
257             sub _add_hit {
258 2446     2446   2281 my ($self, $hit) = @_;
259              
260 2446         4332 my $hit_name = uc($hit->{-name});
261 2446         2716 my $hit_signif = $hit->{-significance};
262 2446         2272 my $ithresh = $self->{'_inclusion_threshold'};
263              
264             # Test significance using custom function (if supplied)
265 2446         1957 my $add_hit = 1;
266              
267 2446         2199 my $hit_filter = $self->{'_hit_filter'};
268              
269 2446 100       3053 if($hit_filter) {
270             # since &hit_filter is out of our control and would expect a HitI object,
271             # we're forced to make one for it
272 4         10 $hit = $self->factory('hit')->create_object(%{$hit});
  4         20  
273 4 100       15 $add_hit = 0 unless &$hit_filter($hit);
274             }
275             else {
276 2442 100       4009 if($self->{'_confirm_significance'}) {
277 4 100       23 $add_hit = 0 unless $hit_signif <= $self->{'_max_significance'};
278             }
279 2442 100       3949 if($self->{'_confirm_score'}) {
280 4   33     26 my $hit_score = $hit->{-score} || $hit->{-hsps}->[0]->{-score};
281 4 100       15 $add_hit = 0 unless $hit_score >= $self->{'_min_score'};
282             }
283 2442 100       3754 if($self->{'_confirm_bits'}) {
284 4   33     59 my $hit_bits = $hit->{-bits} || $hit->{-hsps}->[0]->{-bits};
285 4 100       17 $add_hit = 0 unless $hit_bits >= $self->{'_min_bits'};
286             }
287             }
288              
289 2446 100       5557 $add_hit && $self->_store_hit($hit, $hit_name, $hit_signif);
290             # Building hit lookup hashes for determining if the hit is old/new and
291             # above/below threshold.
292 2446         4105 $self->{'_old_hit_names'}->{$hit_name}++;
293 2446 100       8144 $self->{'_hit_names_below'}->{$hit_name}++ if $hit_signif <= $ithresh;
294             }
295              
296             # Title : _store_hit (private function for internal use only)
297             # Purpose : Collects hit objects into defined sets that are useful for
298             # analyzing PSI-blast results.
299             # These are ultimately added to the iteration object in end_iteration().
300             #
301             # Strategy:
302             # Primary split = old vs. new
303             # Secondary split = below vs. above threshold
304             # 1. Has this hit occurred in a previous iteration?
305             # 1.1. If yes, was it below threshold?
306             # 1.1.1. If yes, ---> [oldhits_below]
307             # 1.1.2. If no, is it now below threshold?
308             # 1.1.2.1. If yes, ---> [oldhits_newly_below]
309             # 1.1.2.2. If no, ---> [oldhits_not_below]
310             # 1.2. If no, is it below threshold?
311             # 1.2.1. If yes, ---> [newhits_below]
312             # 1.2.2. If no, ---> [newhits_not_below]
313             # 1.2.3. If don't know (no inclusion threshold data), ---> [newhits_unclassified]
314             # Note: As long as there's a default inclusion threshold,
315             # there won't be an unclassified set.
316             #
317             # For the first iteration, it might be nice to detect non-PSI blast reports
318             # and put the hits in the unclassified set.
319             # However, it shouldn't matter where the hits get put for the first iteration
320             # for non-PSI blast reports since they'll get flattened out in the
321             # result and iteration search objects.
322              
323             sub _store_hit {
324 2438     2438   2730 my ($self, $hit, $hit_name, $hit_signif) = @_;
325              
326 2438         2061 my $ithresh = $self->{'_inclusion_threshold'};
327              
328             # This is the assumption leading to Bug 1986. The assumption here is that
329             # the hit name is unique (and thus new), therefore any subsequent encounters
330             # with a hit containing the same name are filed as old hits. This isn't
331             # always true (see the bug report for a few examples). Adding an explicit
332             # check for the presence of iterations, adding to new hits otherwise.
333              
334 2438 100 100     6174 if (exists $self->{'_old_hit_names'}->{$hit_name}
335 121         232 && scalar @{$self->{_iterations}}) {
336 117 100       267 if (exists $self->{'_hit_names_below'}->{$hit_name}) {
    100          
337 109         77 push @{$self->{'_oldhits_below'}}, $hit;
  109         200  
338             } elsif ($hit_signif <= $ithresh) {
339 3         5 push @{$self->{'_oldhits_newly_below'}}, $hit;
  3         7  
340             } else {
341 5         8 push @{$self->{'_oldhits_not_below'}}, $hit;
  5         17  
342             }
343             } else {
344 2321 100       6004 if ($hit_signif <= $ithresh) {
345 1773         1407 push @{$self->{'_newhits_below'}}, $hit;
  1773         2775  
346             } else {
347 548         434 push @{$self->{'_newhits_not_below'}}, $hit;
  548         918  
348             }
349             }
350 2438         3135 $self->{'_hitcount'}++;
351             }
352              
353             1;