File Coverage

Bio/SeqIO/swissdriver.pm
Criterion Covered Total %
statement 72 79 91.1
branch 32 50 64.0
condition 6 10 60.0
subroutine 8 9 88.8
pod 3 3 100.0
total 121 151 80.1


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqIO::swissdriver
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Bioperl project bioperl-l(at)bioperl.org
7             #
8             # Copyright Chris Fields and contributors see AUTHORS section
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SeqIO::swissdriver - SwissProt/UniProt handler-based push parser
17              
18             =head1 SYNOPSIS
19              
20             #It is probably best not to use this object directly, but
21             #rather go through the SeqIO handler:
22              
23             $stream = Bio::SeqIO->new(-file => $filename,
24             -format => 'swissdriver');
25              
26             while ( my $seq = $stream->next_seq() ) {
27             # do something with $seq
28             }
29              
30             =head1 DESCRIPTION
31              
32             This object can transform Bio::Seq objects to and from UniProt flat file
33             databases. The key difference between this parser and the tried-and-true
34             Bio::SeqIO::swiss parser is this version separates the parsing and data
35             manipulation into a 'driver' method (next_seq) and separate object handlers
36             which deal with the data passed to it.
37              
38             =head2 The Driver
39              
40             The main purpose of the driver routine, in this case next_seq(), is to carve out
41             the data into meaningful chunks which are passed along to relevant handlers (see
42             below).
43              
44             Each chunk of data in the has a NAME tag attached to it, similar to that for XML
45             parsing. This designates the type of data passed (annotation type or seqfeature)
46             and the handler to be called for processing the data.
47              
48             =head1 FEEDBACK
49              
50             =head2 Mailing Lists
51              
52             User feedback is an integral part of the evolution of this and other
53             Bioperl modules. Send your comments and suggestions preferably to one
54             of the Bioperl mailing lists. Your participation is much appreciated.
55              
56             bioperl-l@bioperl.org - General discussion
57             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
58              
59             =head2 Support
60              
61             Please direct usage questions or support issues to the mailing list:
62              
63             I
64              
65             rather than to the module maintainer directly. Many experienced and
66             reponsive experts will be able look at the problem and quickly
67             address it. Please include a thorough description of the problem
68             with code and data examples if at all possible.
69              
70             =head2 Reporting Bugs
71              
72             Report bugs to the Bioperl bug tracking system to help us keep track
73             the bugs and their resolution. Bug reports can be submitted via the web:
74              
75             https://github.com/bioperl/bioperl-live/issues
76              
77             =head1 AUTHOR - Bioperl Project
78              
79             bioperl-l at bioperl.org
80              
81             =head1 APPENDIX
82              
83             The rest of the documentation details each of the object
84             methods. Internal methods are usually preceded with a _
85              
86             =cut
87              
88             # POD is at the end of the module
89              
90             # Let the code begin...
91              
92             # Let the code begin...
93              
94             package Bio::SeqIO::swissdriver;
95 1     1   14 use vars qw(%FTQUAL_NO_QUOTE);
  1         4  
  1         92  
96 1     1   10 use strict;
  1         3  
  1         49  
97 1     1   8 use Bio::SeqIO::Handler::GenericRichSeqHandler;
  1         3  
  1         44  
98 1     1   8 use Data::Dumper;
  1         4  
  1         94  
99              
100 1     1   9 use base qw(Bio::SeqIO);
  1         5  
  1         1622  
101              
102             # signals to process what's in the hash prior to next round, maps ann => names
103             my %SEC = (
104             OC => 'CLASSIFICATION',
105             OH => 'HOST', # not currently handled, bundled with organism data for now
106             OG => 'ORGANELLE',
107             OX => 'CROSSREF',
108             RA => 'AUTHORS',
109             RC => 'COMMENT',
110             RG => 'CONSRTM',
111             RP => 'POSITION',
112             RX => 'CROSSREF',
113             RT => 'TITLE',
114             RL => 'JOURNAL',
115             AS => 'ASSEMBLYINFO', # Third party annotation
116             '//' => 'RECORDEND'
117             );
118              
119             # add specialized delimiters here for easier postprocessing
120             my %DELIM = (
121             CC => "\n",
122             DR => "\n",
123             DT => "\n",
124             );
125              
126             sub _initialize {
127 9     9   42 my($self,@args) = @_;
128              
129 9         63 $self->SUPER::_initialize(@args);
130 9         56 my $handler = $self->_rearrange([qw(HANDLER)],@args);
131             # hash for functions for decoding keys.
132 9 50       65 $handler ? $self->seqhandler($handler) :
133             $self->seqhandler(Bio::SeqIO::Handler::GenericRichSeqHandler->new(
134             -format => 'swiss',
135             -verbose => $self->verbose,
136             -builder => $self->sequence_builder
137             ));
138 9 50       62 if( ! defined $self->sequence_factory ) {
139 9         36 $self->sequence_factory(Bio::Seq::SeqFactory->new
140             (-verbose => $self->verbose(),
141             -type => 'Bio::Seq::RichSeq'));
142             }
143             }
144              
145             =head2 next_seq
146              
147             Title : next_seq
148             Usage : $seq = $stream->next_seq()
149             Function: returns the next sequence in the stream
150             Returns : Bio::Seq object
151             Args : none
152              
153             =cut
154              
155             sub next_seq {
156 19     19 1 4195 my $self = shift;
157 19         95 my $hobj = $self->seqhandler;
158 19         141 local($/) = "\n";
159             # these contain values that need to carry over each round
160 19         69 my ($featkey, $qual, $annkey, $seqdata, $location);
161 19         68 my $lastann = '';
162 19         53 my $ct = 0;
163             # main parser
164             PARSER:
165 19         154 while(defined(my $line = $self->_readline)) {
166 1866         3459 chomp $line;
167 1866         7204 my ($ann, $data) = split(m{\s+}, $line, 2);
168 1866 100       3947 if ($ann) {
169 1848 100       2927 if ($ann eq 'FT') {
170             # sequence features
171 114 100       679 if ($data =~ m{^(\w+)\s+([\d\?\<]+)\s+([\d\?\>]+)(?:\s+?(\S.*))?}ox) {
    50          
172             # has location data and desc
173 108 50       298 if ($seqdata) {
174 108         423 $hobj->data_handler($seqdata);
175 108         312 $seqdata = ();
176             }
177 108         751 ($seqdata->{FEATURE_KEY}, my $loc1, my $loc2, $data) = ($1, $2, $3, $4);
178 108         251 $qual = 'description';
179 108         310 $seqdata->{$qual} = $data;
180 108         272 $seqdata->{NAME} = $ann;
181 108 50       439 $seqdata->{LOCATION} = "$loc1..$loc2" if defined $loc1;
182 108         578 next PARSER;
183             } elsif ($data =~ m{^\s+/([^=]+)(?:=(.+))?}ox) {
184             # has qualifer
185 0   0     0 ($qual, $data) = ($1, $2 || '');
186             $ct = ($seqdata->{$qual}) ?
187 0 0       0 ((ref($seqdata->{$qual})) ? scalar(@{ $seqdata->{$qual} }) : 1)
  0 0       0  
188             : 0 ;
189             }
190 6         46 $data =~ s{\.$}{};
191 6 50       35 if ($ct == 0) {
192 6 50       60 $seqdata->{$qual} .= ($seqdata->{$qual}) ?
193             ' '.$data : $data;
194             } else {
195 0 0       0 if (!ref($seqdata->{$qual})) {
196 0         0 $seqdata->{$qual} = [$seqdata->{$qual}];
197             }
198             ($seqdata->{$qual}->[$ct]) ?
199             ($seqdata->{$qual}->[$ct] .= ' '.$data) :
200 0 0       0 ($seqdata->{$qual}->[$ct] .= $data);
201             }
202             } else {
203             # simple annotations
204 1734 100       2787 if ($ann ne $lastann) {
205 905 100 100     2652 if (!$SEC{$ann} && $seqdata) {
206 263         1212 $hobj->data_handler($seqdata);
207             # can't use undef here; it can lead to subtle mem leaks
208 263         1131 $seqdata = ();
209             }
210             $annkey = (!$SEC{$ann}) ? 'DATA' : # primary data
211 905 100       2130 $SEC{$ann};
212 905 100       1998 $seqdata->{'NAME'} = $ann if !$SEC{$ann};
213             }
214 1734 50       2775 last PARSER if $ann eq '//';
215 1734 100       2548 next PARSER if $ann eq 'SQ';
216 1717   100     4089 my $delim = $DELIM{$ann} || ' ';
217 1717 100       5004 $seqdata->{$annkey} .= ($seqdata->{$annkey}) ?
218             $delim.$data : $data;
219 1717         5138 $lastann = $ann;
220             }
221             } else {
222             # this should only be sequence (fingers crossed!)
223             SEQUENCE:
224 18         74 while (defined ($line = $self->_readline)) {
225 116 100       285 if (index($line, '//') == 0) {
226 17         59 $data =~ tr{0-9 \n}{}d;
227 17         60 $seqdata->{DATA} = $data;
228             #$self->debug(Dumper($seqdata));
229 17         72 $hobj->data_handler($seqdata);
230 17         46 $seqdata = ();
231 17         75 last PARSER;
232             } else {
233 99         244 $data .= $line;
234 99         233 $line = undef;
235             }
236             }
237             }
238             }
239             # some files have no // for the last file; this catches the last bit o' data
240 19 50       85 $hobj->data_handler($seqdata) if $seqdata;
241 19         106 return $hobj->build_sequence;
242             }
243              
244             =head2 write_seq
245              
246             Title : write_seq
247             Usage : $stream->write_seq($seq)
248             Function: writes the $seq object (must be seq) to the stream
249             Returns : 1 for success and 0 for error
250             Args : array of 1 to n Bio::SeqI objects
251              
252             =cut
253              
254             sub write_seq {
255 0     0 1 0 shift->throw("Use Bio::SeqIO::swiss write_seq() for output");
256             # maybe make a Writer class as well????
257             }
258              
259             =head2 seqhandler
260              
261             Title : seqhandler
262             Usage : $stream->seqhandler($handler)
263             Function: Get/Set the Bio::Seq::HandlerBaseI object
264             Returns : Bio::Seq::HandlerBaseI
265             Args : Bio::Seq::HandlerBaseI
266              
267             =cut
268              
269             sub seqhandler {
270 28     28 1 123 my ($self, $handler) = @_;
271 28 100       120 if ($handler) {
272 9 50 33     93 $self->throw("Not a Bio::HandlerBaseI") unless
273             ref($handler) && $handler->isa("Bio::HandlerBaseI");
274 9         50 $self->{'_seqhandler'} = $handler;
275             }
276 28         90 return $self->{'_seqhandler'};
277             }
278              
279             1;
280              
281             __END__