File Coverage

Bio/SeqIO/embldriver.pm
Criterion Covered Total %
statement 81 91 89.0
branch 49 58 84.4
condition 11 17 64.7
subroutine 8 10 80.0
pod 3 4 75.0
total 152 180 84.4


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqIO::embldriver
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Ewan Birney
7             #
8             # Copyright Ewan Birney
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SeqIO::embldriver - EMBL sequence input/output stream
17              
18             =head1 SYNOPSIS
19              
20             It is probably best not to use this object directly, but
21             rather go through the SeqIO handler system. Go:
22              
23             $stream = Bio::SeqIO->new(-file => $filename, -format => 'embldriver');
24              
25             while ( (my $seq = $stream->next_seq()) ) {
26             # do something with $seq
27             }
28              
29             =head1 DESCRIPTION
30              
31             This object can transform Bio::Seq objects to and from EMBL flat
32             file databases.
33              
34             There is a lot of flexibility here about how to dump things which
35             should be documented more fully.
36              
37             There should be a common object that this and Genbank share (probably
38             with Swissprot). Too much of the magic is identical.
39              
40             =head2 Optional functions
41              
42             =over 3
43              
44             =item _show_dna()
45              
46             (output only) shows the dna or not
47              
48             =item _post_sort()
49              
50             (output only) provides a sorting func which is applied to the FTHelpers
51             before printing
52              
53             =item _id_generation_func()
54              
55             This is function which is called as
56              
57             print "ID ", $func($annseq), "\n";
58              
59             To generate the ID line. If it is not there, it generates a sensible ID
60             line using a number of tools.
61              
62             If you want to output annotations in EMBL format they need to be
63             stored in a Bio::Annotation::Collection object which is accessible
64             through the Bio::SeqI interface method L.
65              
66             The following are the names of the keys which are polled from a
67             L object.
68              
69             reference - Should contain Bio::Annotation::Reference objects
70             comment - Should contain Bio::Annotation::Comment objects
71             dblink - Should contain Bio::Annotation::DBLink objects
72              
73             =back
74              
75             =head1 FEEDBACK
76              
77             =head2 Mailing Lists
78              
79             User feedback is an integral part of the evolution of this and other
80             Bioperl modules. Send your comments and suggestions preferably to one
81             of the Bioperl mailing lists. Your participation is much appreciated.
82              
83             bioperl-l@bioperl.org - General discussion
84             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
85              
86             =head2 Support
87              
88             Please direct usage questions or support issues to the mailing list:
89              
90             I
91              
92             rather than to the module maintainer directly. Many experienced and
93             reponsive experts will be able look at the problem and quickly
94             address it. Please include a thorough description of the problem
95             with code and data examples if at all possible.
96              
97             =head2 Reporting Bugs
98              
99             Report bugs to the Bioperl bug tracking system to help us keep track
100             the bugs and their resolution. Bug reports can be submitted via
101             the web:
102              
103             https://github.com/bioperl/bioperl-live/issues
104              
105             =head1 AUTHOR - Ewan Birney
106              
107             Email birney@ebi.ac.uk
108              
109             =head1 APPENDIX
110              
111             The rest of the documentation details each of the object
112             methods. Internal methods are usually preceded with a _
113              
114             =cut
115              
116             # Let the code begin...
117              
118             package Bio::SeqIO::embldriver;
119 1     1   13 use vars qw(%FTQUAL_NO_QUOTE);
  1         4  
  1         143  
120 1     1   7 use strict;
  1         3  
  1         45  
121 1     1   8 use Bio::SeqIO::Handler::GenericRichSeqHandler;
  1         4  
  1         40  
122 1     1   5 use Data::Dumper;
  1         3  
  1         95  
123              
124 1     1   8 use base qw(Bio::SeqIO);
  1         1  
  1         1893  
125              
126             my %FTQUAL_NO_QUOTE = map {$_ => 1} qw(
127             anticodon citation
128             codon codon_start
129             cons_splice direction
130             evidence label
131             mod_base number
132             rpt_type rpt_unit
133             transl_except transl_table
134             usedin
135             LOCATION
136             );
137              
138             my %DATA_KEY = (
139             ID => 'ID',
140             AC => 'ACCESSION',
141             DT => 'DATE',
142             DE => 'DESCRIPTION',
143             KW => 'KEYWORDS',
144             OS => 'SOURCE',
145             OC => 'CLASSIFICATION',
146             OG => 'ORGANELLE',
147             RN => 'REFERENCE',
148             RA => 'AUTHORS',
149             RC => 'COMMENT',
150             RG => 'CONSRTM',
151             RP => 'POSITION',
152             RX => 'CROSSREF',
153             RT => 'TITLE',
154             RL => 'LOCATION',
155             XX => 'SPACER',
156             FH => 'FEATHEADER',
157             FT => 'FEATURES',
158             AH => 'TPA_HEADER', # Third party annotation
159             AS => 'TPA_DATA', # Third party annotation
160             DR => 'DBLINK',
161             CC => 'COMMENT',
162             CO => 'CO',
163             CON => 'CON',
164             WGS => 'WGS',
165             ANN => 'ANN',
166             TPA => 'TPA',
167             SQ => 'SEQUENCE',
168             );
169              
170             my %SEC = (
171             OC => 'CLASSIFICATION',
172             OH => 'HOST', # not currently handled, bundled with organism data for now
173             OG => 'ORGANELLE',
174             OX => 'CROSSREF',
175             RA => 'AUTHORS',
176             RC => 'COMMENT',
177             RG => 'CONSRTM',
178             RP => 'POSITION',
179             RX => 'CROSSREF',
180             RT => 'TITLE',
181             RL => 'JOURNAL',
182             AS => 'ASSEMBLYINFO', # Third party annotation
183             );
184              
185             my %DELIM = (
186             #CC => "\n",
187             #DR => "\n",
188             #DT => "\n",
189             );
190              
191             # signals to process what's in the hash prior to next round
192             # these should be changed to map secondary data
193             my %PRIMARY = map {$_ => 1} qw(ID AC DT DE SV KW OS RN AH DR FH CC SQ FT WGS CON ANN TPA //);
194              
195             sub _initialize {
196 10     10   38 my($self,@args) = @_;
197              
198 10         60 $self->SUPER::_initialize(@args);
199 10         63 my $handler = $self->_rearrange([qw(HANDLER)],@args);
200             # hash for functions for decoding keys.
201 10 50       90 $handler ? $self->seqhandler($handler) :
202             $self->seqhandler(Bio::SeqIO::Handler::GenericRichSeqHandler->new(
203             -format => 'embl',
204             -verbose => $self->verbose,
205             -builder => $self->sequence_builder
206             ));
207             #
208 10 50       55 if( ! defined $self->sequence_factory ) {
209 10         49 $self->sequence_factory(Bio::Seq::SeqFactory->new
210             (-verbose => $self->verbose(),
211             -type => 'Bio::Seq::RichSeq'));
212             }
213             }
214              
215             =head2 next_seq
216              
217             Title : next_seq
218             Usage : $seq = $stream->next_seq()
219             Function: returns the next sequence in the stream
220             Returns : Bio::Seq object
221             Args :
222              
223             =cut
224              
225             sub next_seq {
226 10     10 1 35 my $self = shift;
227 10         39 my $hobj = $self->seqhandler;
228 10         80 local($/) = "\n";
229 10         40 my ($featkey, $qual, $annkey, $delim, $seqdata);
230 10         19 my $lastann = '';
231 10         23 my $ct = 0;
232             PARSER:
233 10         57 while(defined(my $line = $self->_readline)) {
234 2767 100       6937 next PARSER if $line =~ m{^\s*$};
235 2766         2703 chomp $line;
236 2766         8696 my ($ann,$data) = split m{\s{2,3}}, $line , 2;
237 2766 100 100     11388 next PARSER if ($ann eq 'XX' || $ann eq 'FH');
238 2602 100       3234 if ($ann) {
239 2592   100     3887 $data ||='';
240 2592 100       3337 if ($ann eq 'FT') {
241             # seqfeatures
242 1752 100       7120 if ($data =~ m{^(\S+)\s+([^\n]+)}) {
    100          
243 193 50       756 $hobj->data_handler($seqdata) if $seqdata;
244 193         253 $seqdata = ();
245 193         1190 ($seqdata->{FEATURE_KEY}, $data) = ($1, $2);
246 193         354 $seqdata->{NAME} = $ann;
247 193         277 $qual = 'LOCATION';
248             } elsif ($data =~ m{^\s+/([^=]+)=?(.+)?}) {
249 935   50     3052 ($qual, $data) = ($1, $2 ||'');
250             $ct = (exists $seqdata->{$qual}) ?
251 935 100       1819 ((ref($seqdata->{$qual})) ? scalar(@{ $seqdata->{$qual} }) : 1)
  32 100       101  
252             : 0 ;
253             }
254 1752         3234 $data =~ s{^\s+}{};
255 1752         2168 $data =~ tr{"}{}d; # we don't care about quotes yet...
256 1752 100       3045 my $delim = ($FTQUAL_NO_QUOTE{$qual}) ? '' : ' ';
257 1752 100       2272 if ($ct == 0) {
258 1601 100       7597 $seqdata->{$qual} .= ($seqdata->{$qual}) ?
259             $delim.$data :
260             $data;
261             } else {
262 151 100       343 if (!ref($seqdata->{$qual})) {
263 117         241 $seqdata->{$qual} = [$seqdata->{$qual}];
264             }
265             (exists $seqdata->{$qual}->[$ct]) ?
266             (($seqdata->{$qual}->[$ct]) .= $delim.$data) :
267 151 100       1139 (($seqdata->{$qual}->[$ct]) .= $data);
268             }
269             } else {
270             # simple annotations
271 840         1265 $data =~ s{;$}{};
272 840 50       1530 last PARSER if $ann eq '//';
273 840 100       1354 if ($ann ne $lastann) {
274 454 100 100     1269 if (!$SEC{$ann} && $seqdata) {
275 141         488 $hobj->data_handler($seqdata);
276             # can't use undef here; it can lead to subtle mem leaks
277 141         205 $seqdata = ();
278             }
279             $annkey = (!$SEC{$ann}) ? 'DATA' : # primary data
280 454 100       1331 $SEC{$ann};
281 454 100       1352 $seqdata->{'NAME'} = $ann if !$SEC{$ann};
282             }
283            
284             # toss the data for SQ lines; this needs to be done after the
285             # call to the data handler
286            
287 840 100       1432 next PARSER if $ann eq 'SQ';
288 829   50     2527 my $delim = $DELIM{$ann} || ' ';
289 829 100       2950 $seqdata->{$annkey} .= ($seqdata->{$annkey}) ?
290             $delim.$data : $data;
291 829         2699 $lastann = $ann;
292             }
293             } else {
294             # this should only be sequence (fingers crossed!)
295             SEQUENCE:
296 10         59 while (defined ($line = $self->_readline)) {
297 2477 100       4583 if (index($line, '//') == 0) {
298 9         510 $data =~ tr{0-9 \n}{}d;
299 9         187 $seqdata->{DATA} = $data;
300             #$self->debug(Dumper($seqdata));
301 9         60 $hobj->data_handler($seqdata);
302 9         16 $seqdata = ();
303 9         46 last PARSER;
304             } else {
305 2468         3418 $data .= $line;
306 2468         5671 $line = undef;
307             }
308             }
309             }
310             }
311 10 100       41 $hobj->data_handler($seqdata) if $seqdata;
312 10         17 $seqdata = ();
313 10         43 return $hobj->build_sequence;
314             }
315              
316             sub next_chunk {
317 0     0 0 0 my $self = shift;
318 0         0 my $ct = 0;
319             PARSER:
320 0         0 while(defined(my $line = $self->_readline)) {
321 0 0       0 next if $line =~ m{^\s*$};
322 0         0 chomp $line;
323 0         0 my ($ann,$data) = split m{\s{2,3}}, $line , 2;
324 0   0     0 $data ||= '';
325 0         0 $self->debug("Ann: [$ann]\n\tData: [$data]\n");
326 0 0       0 last PARSER if $ann =~ m{//};
327             }
328             }
329              
330             =head2 write_seq
331              
332             Title : write_seq
333             Usage : $stream->write_seq($seq)
334             Function: writes the $seq object (must be seq) to the stream
335             Returns : 1 for success and 0 for error
336             Args : array of 1 to n Bio::SeqI objects
337              
338             =cut
339              
340             sub write_seq {
341 0     0 1 0 shift->throw("Use Bio::SeqIO::embl for output");
342             # maybe make a Writer class as well????
343             }
344              
345             =head2 seqhandler
346              
347             Title : seqhandler
348             Usage : $stream->seqhandler($handler)
349             Function: Get/Set the Bio::Seq::HandlerBaseI object
350             Returns : Bio::Seq::HandlerBaseI
351             Args : Bio::Seq::HandlerBaseI
352              
353             =cut
354              
355             sub seqhandler {
356 20     20 1 39 my ($self, $handler) = @_;
357 20 100       69 if ($handler) {
358 10 50 33     96 $self->throw("Not a Bio::HandlerBaseI") unless
359             ref($handler) && $handler->isa("Bio::HandlerBaseI");
360 10         51 $self->{'_seqhandler'} = $handler;
361             }
362 20         51 return $self->{'_seqhandler'};
363             }
364              
365             1;
366              
367             __END__