File Coverage

Bio/SearchIO/cross_match.pm
Criterion Covered Total %
statement 52 88 59.0
branch 20 48 41.6
condition 7 15 46.6
subroutine 7 9 77.7
pod 2 2 100.0
total 88 162 54.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SearchIO::cross_match
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Shin Leong
7             #
8             # Copyright Shin Leong
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SearchIO::cross_match - CrossMatch-specific subclass of Bio::SearchIO
17              
18             =head1 SYNOPSIS
19              
20             # Working with iterations (CrossMatch results)
21             my $searchIO = Bio::SearchIO->new( -format => 'cross_match',
22             -file => "$file.screen.out" )
23             while(my $r = $searchIO->next_result) {
24             while(my $hit = $r->next_hit) {
25             while(my $hsp = $hit->next_hsp) {
26             #Do the processing here.
27             }
28             }
29             }
30              
31             See L for details about working with Bio::SearchIO.
32              
33             =head1 DESCRIPTION
34              
35             This object is a subclass of Bio::SearchIO
36             and provides some operations that facilitate working with CrossMatch
37             and CrossMatch results.
38              
39             For general information about working with Results, see
40             L.
41              
42             =head1 FEEDBACK
43              
44             =head2 Mailing Lists
45              
46             User feedback is an integral part of the evolution of this and other
47             Bioperl modules. Send your comments and suggestions preferably to
48             the Bioperl mailing list. Your participation is much appreciated.
49              
50             bioperl-l@bioperl.org - General discussion
51             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52              
53             =head2 Support
54              
55             Please direct usage questions or support issues to the mailing list:
56              
57             I
58              
59             rather than to the module maintainer directly. Many experienced and
60             reponsive experts will be able look at the problem and quickly
61             address it. Please include a thorough description of the problem
62             with code and data examples if at all possible.
63              
64             =head2 Reporting Bugs
65              
66             Report bugs to the Bioperl bug tracking system to help us keep track
67             of the bugs and their resolution. Bug reports can be submitted via the
68             web:
69              
70             https://github.com/bioperl/bioperl-live/issues
71              
72             =head1 AUTHOR - Shin Leong
73              
74             Email sleong@watson.wustl.edu
75              
76             =head1 CONTRIBUTORS
77              
78             Additional contributors names and emails here
79              
80             =head1 APPENDIX
81              
82             The rest of the documentation details each of the object methods.
83             Internal methods are usually preceded with a _
84              
85             =cut
86              
87              
88             # Let the code begin...
89              
90             package Bio::SearchIO::cross_match;
91 1     1   343 use Bio::Search::Result::CrossMatchResult;
  1         2  
  1         25  
92 1     1   4 use Bio::SearchIO;
  1         1  
  1         33  
93 1     1   621 use Bio::Search::Hit::GenericHit;
  1         2  
  1         32  
94 1     1   526 use Bio::Search::HSP::GenericHSP;
  1         2  
  1         80  
95 1     1   5 use base qw(Bio::SearchIO);
  1         1  
  1         787  
96              
97             =head2 next_result
98              
99             Title : next_result
100             Usage : $result = stream->next_result
101             Function: Reads the next ResultI object from the stream and returns it.
102              
103             Certain driver modules may encounter entries in the stream that
104             are either misformatted or that use syntax not yet understood
105             by the driver. If such an incident is recoverable, e.g., by
106             dismissing a feature of a feature table or some other non-mandatory
107             part of an entry, the driver will issue a warning. In the case
108             of a non-recoverable situation an exception will be thrown.
109             Do not assume that you can resume parsing the same stream after
110             catching the exception. Note that you can always turn recoverable
111             errors into exceptions by calling $stream->verbose(2) (see
112             Bio::Root::RootI POD page).
113             Returns : A Bio::Search::Result::ResultI object
114             Args : n/a
115              
116             See L
117              
118             =cut
119              
120             sub next_result {
121 1     1 1 7 my ($self) = @_;
122 1         1 my $start = 0;
123 1         5 while ( defined( $_ = $self->_readline ) ) {
124 41 50       54 return if ( $self->{'_end_document'} );
125 41 100 66     208 if (/^cross_match version\s+(.*?)$/) {
    100 100        
    50          
    100          
    100          
    50          
    50          
126 1         3 $self->{_algorithm_version} = $1;
127             }
128             elsif (/^Maximal single base matches/) {
129 1         2 $start = 1;
130             }
131             elsif (/^(\d+) matching entries/) {
132 0         0 $self->{'_end_document'} = 1;
133 0         0 return;
134             }
135             elsif ( ( $start || $self->{'_result_count'} ) && /^\s*(\d+)/xms ) {
136 1         2 $self->{'_result_count'}++;
137 1         3 return $self->_parse($_);
138             }
139             elsif ( !$self->{_parameters} ) {
140 1 50       13 if (/.*?\s+(\-.*?)$/) {
141 1         3 my $p = $1;
142 1         4 my @pp = split /\s+/, $p;
143 1         3 for ( my $i = 0 ; $i < @pp ; $i++ ) {
144 1 50       4 if ( $pp[$i] =~ /^\-/ ) {
145 1 50 33     3 if ( $pp[ $i + 1 ] && $pp[ $i + 1 ] !~ /^\-/ ) {
146 0         0 $self->{_parameters}->{ $pp[$i] } = $pp[ $i + 1 ];
147 0         0 $i++;
148             }
149             else {
150 1         6 $self->{_parameters}->{ $pp[$i] } = "";
151             }
152             }
153             }
154             }
155             }
156             elsif (/^Query file(s):\s+(.*?)$/) {
157 0         0 $self->{_query_name} = $1;
158             }
159             elsif (/^Subject file(s):\s+(.*?)$/) {
160 0         0 $self->{_subject_name} = $2;
161             }
162             }
163             }
164              
165              
166             =head2 _alignment
167              
168             Title : _alignment
169             Usage : private
170              
171             =cut
172              
173             sub _alignment {
174 0     0   0 my $self = shift;
175              
176             # C H_EO-aaa01PCR02 243 CCTCTGAATGGCTGAAGACCCCTCTGCCGAGGGAGGTTGGGGATTGTGGG 194
177             #
178             # 0284119_008.c1- 1 CCTCTGAATGGCTGAAGACCCCTCTGCCGAGGGAGGTTGGGGATTGTGGG 50
179             #
180             # C H_EO-aaa01PCR02 193 ACAAGGTCCCTTGGTGCTGATGGCCTGAAGGGGCCTGAGCTGTGGGCAGA 144
181             #
182             # 0284119_008.c1- 51 ACAAGGTCCCTTGGTGCTGATGGCCTGAAGGGGCCTGAGCTGTGGGCAGA 100
183             #
184             # C H_EO-aaa01PCR02 143 TGCAGTTTTCTGTGGGCTTGGGGAACCTCTCACGTTGCTGTGTCCTGGTG 94
185             #
186             # 0284119_008.c1- 101 TGCAGTTTTCTGTGGGCTTGGGGAACCTCTCACGTTGCTGTGTCCTGGTG 150
187             #
188             # C H_EO-aaa01PCR02 93 AGCAGCCCGACCAATAAACCTGCTTTTCTAAAAGGATCTGTGTTTGATTG 44
189             #
190             # 0284119_008.c1- 151 AGCAGCCCGACCAATAAACCTGCTTTTCTAAAAGGATCTGTGTTTGATTG 200
191             #
192             # C H_EO-aaa01PCR02 43 TATTCTCTGAAGGCAGTTACATAGGGTTACAGAGG 9
193             #
194             # 0284119_008.c1- 201 TATTCTCTGAAGGCAGTTACATAGGGTTACAGAGG 235
195              
196             # LSF: Should be the blank line. Otherwise error.
197 0         0 my $blank = $self->_readline;
198 0 0       0 unless ( $blank =~ /^\s*$/ ) {
199 0         0 return;
200             }
201 0         0 my @data;
202             my @pad;
203 0         0 $count = 0;
204 0         0 while ( defined( $_ = $self->_readline ) ) {
205 0 0       0 $count = 0 if ( $count >= 3 );
206 0 0       0 next if (/^$/);
207 0 0       0 if (/^(C \S+.*?\d+ )(\S+) \d+$|^( \S+.*?\d+ )(\S+) \d+$$|^\s+$/) {
208 0         0 $count++;
209 0 0 0     0 if ( $1 || $3 ) {
210 0 0       0 $pad[$count] = $1 ? $1 : $3;
211 0 0       0 push @{ $data[$count] }, ( $2 ? $2 : $4 );
  0         0  
212             }
213             else {
214 0 0       0 if (/\s{$pad[0],$pad[0]}(.*?)$/) {
215 0         0 push @{ $data[$count] }, $1;
  0         0  
216             }
217             else {
218 0         0 $self->throw("Format error for the homology line [$_].");
219             }
220             }
221             }
222             else {
223 0         0 last;
224             }
225             }
226 0         0 return @data;
227             }
228              
229              
230             =head2 _parse
231              
232             Title : _parse
233             Usage : private
234              
235             =cut
236              
237             sub _parse {
238 1     1   2 my $self = shift;
239 1         1 my $line = shift;
240 1         1 my $is_alignment = 0;
241 1         1 my ( $hit_seq, $homology_seq, $query_seq );
242              
243             # 32 5.13 0.00 0.00 H_DO-0065PCR0005792_034a.b1-1 327 365 (165) C 1111547847_forward (0) 39 1
244             #OR
245             #ALIGNMENT 32 5.13 0.00 0.00 H_DO-0065PCR0005792_034a.b1-1 327 365 (165) C 1111547847_forward (0) 39 1
246 1         12 $line =~ s/^\s+|\s+$//g;
247 1         10 my @r = split /\s+/, $line;
248 1 50       3 if ( $r[0] eq "ALIGNMENT" ) {
249 0         0 $is_alignment = 1;
250 0         0 shift @r;
251 0         0 ( $hit_seq, $homology_seq, $query_seq ) = $self->_alignment();
252             }
253 1         1 my $subject_seq_id;
254 1         1 my $query_seq_id = $r[4];
255 1         1 my $query_start = $r[5];
256 1         1 my $query_end = $r[6];
257 1         0 my $is_complement = 0;
258 1         2 my $subject_start;
259             my $subject_end;
260              
261 1 50 33     4 if ( $r[8] eq "C" && $r[9] !~ /^\(\d+\)$/ ) {
262 0         0 $subject_seq_id = $r[9];
263 0         0 $is_complement = 1;
264 0         0 $subject_start = $r[11];
265 0         0 $subject_end = $r[12];
266             }
267             else {
268 1         1 $subject_seq_id = $r[8];
269 1         1 $subject_start = $r[9];
270 1         1 $subject_end = $r[10];
271             }
272 1 50       13 my $hit = Bio::Search::Hit::GenericHit->new(
    50          
    50          
273             -name => $subject_seq_id,
274             -hsps => [
275             Bio::Search::HSP::GenericHSP->new(
276             -query_name => $query_seq_id,
277             -query_start => $query_start,
278             -query_end => $query_end,
279             -hit_name => $subject_seq_id,
280             -hit_start => $subject_start,
281             -hit_end => $subject_end,
282             -query_length => 0,
283             -hit_length => 0,
284             -identical => $r[0],
285             -conserved => $r[0],
286             -query_seq => $query_seq
287             ? ( join "", @$query_seq )
288             : "", #query sequence portion of the HSP
289             -hit_seq => $hit_seq
290             ? ( join "", @$hit_seq )
291             : "", #hit sequence portion of the HSP
292             -homology_seq => $homology_seq
293             ? ( join "", @$homology_seq )
294             : "", #homology sequence for the HSP
295             #LSF: Need the direction, just to fool the GenericHSP module.
296             -algorithm => 'SW',
297             )
298             ],
299             );
300             my $result = Bio::Search::Result::CrossMatchResult->new(
301             -query_name => $self->{_query_name},
302             -query_accession => '',
303             -query_description => '',
304             -query_length => 0,
305             -database_name => $self->{_subject_name},
306             -database_letters => 0,
307             -database_entries => 0,
308             -parameters => $self->{_parameters},
309             -statistics => {},
310             -algorithm => 'cross_match',
311             -algorithm_version => $self->{_algorithm_version},
312 1         14 );
313 1         7 $result->add_hit($hit);
314 1         4 return $result;
315             }
316              
317              
318             =head2 result_count
319              
320             Title : result_count
321             Usage : $num = $stream->result_count;
322             Function: Gets the number of CrossMatch results that have been parsed.
323             Returns : integer
324             Args : none
325             Throws : none
326              
327             =cut
328              
329             sub result_count {
330 0     0 1   my $self = shift;
331 0           return $self->{'_result_count'};
332             }
333              
334              
335             1;
336             #$Header$