File Coverage

blib/lib/Bio/FastParsers/Blast/Table.pm
Criterion Covered Total %
statement 52 53 98.1
branch 15 16 93.7
condition 5 6 83.3
subroutine 11 11 100.0
pod 3 3 100.0
total 86 89 96.6


line stmt bran cond sub pod time code
1             # ABSTRACT: Front-end class for tabular BLAST parser
2             $Bio::FastParsers::Blast::Table::VERSION = '0.221230';
3             use Moose;
4 7     7   3228 use namespace::autoclean;
  7         2955087  
  7         43  
5 7     7   49727  
  7         50480  
  7         29  
6             use autodie;
7 7     7   494  
  7         16  
  7         61  
8             use List::AllUtils qw(mesh);
9 7     7   34413  
  7         27726  
  7         1085  
10             extends 'Bio::FastParsers::Base';
11              
12             use Bio::FastParsers::Constants qw(:files);
13 7     7   3592 use aliased 'Bio::FastParsers::Blast::Table::Hsp';
  7         18  
  7         1061  
14 7     7   3292  
  7         4438  
  7         37  
15             # TODO: recreate Table classes and internal classes through Templating
16             # TODO: check API consistency with Hmmer::Table and DomTable through synonyms
17             # TODO: document Hsp/Hit methods
18              
19             # public attributes (inherited)
20              
21              
22             # private attributes
23              
24             has '_line_iterator' => (
25             traits => ['Code'],
26             is => 'ro',
27             isa => 'CodeRef',
28             init_arg => undef,
29             lazy => 1,
30             builder => '_build_line_iterator',
31             handles => {
32             _next_line => 'execute',
33             },
34             );
35              
36             has '_last_' . $_ => (
37             is => 'ro',
38             isa => 'Str',
39             init_arg => undef,
40             default => q{},
41             writer => '_set_last_' . $_,
42             ) for qw(query hit);
43              
44              
45             ## no critic (ProhibitUnusedPrivateSubroutines)
46              
47             my $self = shift;
48              
49 19     19   27 open my $fh, '<', $self->file; # autodie
50             return sub { <$fh> }; # return closure
51 19         444 }
52 19     182   5923  
  182         1087  
53             ## use critic
54              
55             my @attrs = qw(
56             query_id hit_id
57             percent_identity hsp_length mismatches gaps
58             query_from query_to
59             hit_from hit_to
60             evalue bit_score
61             query_strand
62             hit_strand
63             query_start query_end
64             hit_start hit_end
65             ); # DID try to use MOP to get HSP attrs but order was not preserved
66              
67              
68             my $self = shift;
69              
70             # optional args for next_hit/next_query mode
71 89     89 1 317 my $curr_query = shift;
72             my $curr_hit = shift;
73              
74 89         126 LINE:
75 89         120 while (my $line = $self->_next_line) {
76              
77             # skip header/comments and empty lines
78 89         2921 chomp $line;
79             next LINE if $line =~ $COMMENT_LINE
80             || $line =~ $EMPTY_LINE;
81 182         309  
82 182 100 66     3812 # process HSP line
83             my @fields = ( split(/\t/xms, $line), +1, +1 );
84              
85             # Fields for m8/m9 (now 6/7) format:
86 92         431 # 0. query id
87             # 1. subject id
88             # 2. % identity
89             # 3. alignment length
90             # 4. mismatches
91             # 5. gap opens
92             # 6. q. start => query_from
93             # 7. q. end => query_to
94             # 8. s. start => hit_from
95             # 9. s. end => hit_to
96             # 10. evalue
97             # 11. bit score
98             # [12.] query_strand
99             # [13.] hit_strand
100             # [14.] query_start
101             # [15.] query_end
102             # [16.] hit_start
103             # [17.] hit_end
104              
105             # optionally skip current line in next_hit/next_query mode
106             if (defined $curr_query) {
107             if (defined $curr_hit) {
108             next LINE if $fields[0] eq $curr_query
109 92 100       191 && $fields[1] eq $curr_hit;
110 41 100       77 }
111 34 100 100     210 else {
112             next LINE if $fields[0] eq $curr_query;
113             }
114             }
115 7 50       24  
116             # coerce numeric fields to numbers...
117             # ... and handle missing bitscores and evalues from USEARCH reports
118             @fields[2..9] = map { 0 + $_ } @fields[2..9];
119             @fields[10..11] = map { $_ ne '*' ? 0 + $_ : undef } @fields[10..11];
120              
121 89         196 # add default strands and coordinates
  712         1285  
122 89 100       162 push @fields, @fields[6..9];
  178         452  
123              
124             # fix query strand and coordinates based on query orientation
125 89         183 if ($fields[14] > $fields[15]) {
126             @fields[14,15] = @fields[15,14];
127             $fields[12] = -1;
128 89 100       160 }
129 8         18  
130 8         14 # fix hit strand and coordinates based on hit orientation
131             if ($fields[16] > $fields[17]) {
132             @fields[16,17] = @fields[17,16];
133             $fields[13] = -1;
134 89 100       152 }
135 11         23  
136 11         17 # build HSP object
137             my $hsp = Hsp->new( { mesh @attrs, @fields } );
138              
139             # update last query and last hit
140 89         3944 # Note: this allows mixing calls to next_hsp / next_hit / next_query
141             $self->_set_last_query($hsp->query_id);
142             $self->_set_last_hit( $hsp->hit_id );
143              
144 89         3034 # return HSP object
145 89         2273 return $hsp;
146             }
147              
148 89         403 return; # no more line to read
149             }
150              
151 0         0  
152              
153             my $self = shift;
154             return $self->next_hsp( $self->_last_query, $self->_last_hit );
155             }
156              
157 31     31 1 186  
158 31         831  
159             my $self = shift;
160             return $self->next_hsp( $self->_last_query );
161             }
162              
163              
164 7     7 1 43 __PACKAGE__->meta->make_immutable;
165 7         190 1;
166              
167              
168             =pod
169              
170             =head1 NAME
171              
172             Bio::FastParsers::Blast::Table - Front-end class for tabular BLAST parser
173              
174             =head1 VERSION
175              
176             version 0.221230
177              
178             =head1 SYNOPSIS
179              
180             use aliased 'Bio::FastParsers::Blast::Table';
181              
182             # open and parse BLAST report in tabular format
183             my $infile = 'test/blastp.m9';
184             my $report = Table->new( file => $infile );
185              
186             # loop through hsps
187             while (my $hsp = $report->next_hsp) {
188             my ($hit_id, $evalue) = ($hsp->hit_id, $hsp->evalue);
189             # ...
190             }
191              
192             # ...
193              
194             my $infile = 'test/multiquery-blastp.m9';
195             my $report = Table->new( file => $infile );
196              
197             # loop through first hits for each query
198             while (my $first_hit = $report->next_query) {
199             my ($hit_id, $evalue) = ($hsp->hit_id, $hsp->evalue);
200             # ...
201             }
202              
203             =head1 DESCRIPTION
204              
205             This module implements a parser for the standard tabular output format of the
206             BLAST program (e.g., C<-outfmt 7>). It provides methods for iterating over
207             queries, hits and HSPs (e.g., C<next_hsp>). Individual HSPs can then be
208             queried using methods described in L<Bio::FastParsers::Blast::Table::Hsp>.
209              
210             =head1 ATTRIBUTES
211              
212             =head2 file
213              
214             Path to BLAST report file in tabular format (m8/m9 or now 6/7) to be parsed
215              
216             =head1 METHODS
217              
218             =head2 next_hsp
219              
220             Shifts the first HSP of the report off and returns it, shortening the report
221             by 1 and moving everything down. If there are no more HSPs in the report,
222             returns C<undef>.
223              
224             # $report is a Bio::FastParsers::Blast::Table
225             while (my $hsp = $report->next_hsp) {
226             # process $hsp
227             # ...
228             }
229              
230             This method does not accept any arguments.
231              
232             =head2 next_hit
233              
234             Directly returns the first HSP of the next hit, skipping any remaining HSPs
235             for the current hit in the process. If there are no more hits in the report,
236             returns C<undef>. Useful for processing only the first HSP of each hit.
237              
238             # $report is a Bio::FastParsers::Blast::Table
239             while (my $first_hsp = $report->next_hit) {
240             # process $first_hsp
241             # ...
242             }
243              
244             This method does not accept any arguments.
245              
246             =head2 next_query
247              
248             Directly returns the first HSP of the next query, skipping any remaining
249             HSPs for the current query in the process. If there are no more queries in
250             the report, returns C<undef>. Useful for processing only the first hit of
251             each query.
252              
253             # $report is a Bio::FastParsers::Blast::Table
254             while (my $first_hit = $report->next_query) {
255             # process $first_hit
256             # ...
257             }
258              
259             This method does not accept any arguments.
260              
261             =head1 AUTHOR
262              
263             Denis BAURAIN <denis.baurain@uliege.be>
264              
265             =head1 COPYRIGHT AND LICENSE
266              
267             This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
268              
269             This is free software; you can redistribute it and/or modify it under
270             the same terms as the Perl 5 programming language system itself.
271              
272             =cut