File Coverage

Bio/SeqIO/swissdriver.pm

Criterion	Covered	Total	%
statement	72	79	91.1
branch	32	50	64.0
condition	6	10	60.0
subroutine	8	9	88.8
pod	3	3	100.0
total	121	151	80.1

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# BioPerl module for Bio::SeqIO::swissdriver
3							#
4							# Please direct questions and support issues to
5							#
6							# Cared for by Bioperl project bioperl-l(at)bioperl.org
7							#
8							# Copyright Chris Fields and contributors see AUTHORS section
9							#
10							# You may distribute this module under the same terms as perl itself
11
12							# POD documentation - main docs before the code
13
14							=head1 NAME
15
16							Bio::SeqIO::swissdriver - SwissProt/UniProt handler-based push parser
17
18							=head1 SYNOPSIS
19
20							#It is probably best not to use this object directly, but
21							#rather go through the SeqIO handler:
22
23							$stream = Bio::SeqIO->new(-file => $filename,
24							-format => 'swissdriver');
25
26							while ( my $seq = $stream->next_seq() ) {
27							# do something with $seq
28							}
29
30							=head1 DESCRIPTION
31
32							This object can transform Bio::Seq objects to and from UniProt flat file
33							databases. The key difference between this parser and the tried-and-true
34							Bio::SeqIO::swiss parser is this version separates the parsing and data
35							manipulation into a 'driver' method (next_seq) and separate object handlers
36							which deal with the data passed to it.
37
38							=head2 The Driver
39
40							The main purpose of the driver routine, in this case next_seq(), is to carve out
41							the data into meaningful chunks which are passed along to relevant handlers (see
42							below).
43
44							Each chunk of data in the has a NAME tag attached to it, similar to that for XML
45							parsing. This designates the type of data passed (annotation type or seqfeature)
46							and the handler to be called for processing the data.
47
48							=head1 FEEDBACK
49
50							=head2 Mailing Lists
51
52							User feedback is an integral part of the evolution of this and other
53							Bioperl modules. Send your comments and suggestions preferably to one
54							of the Bioperl mailing lists. Your participation is much appreciated.
55
56							bioperl-l@bioperl.org - General discussion
57							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
58
59							=head2 Support
60
61							Please direct usage questions or support issues to the mailing list:
62
63							I
64
65							rather than to the module maintainer directly. Many experienced and
66							reponsive experts will be able look at the problem and quickly
67							address it. Please include a thorough description of the problem
68							with code and data examples if at all possible.
69
70							=head2 Reporting Bugs
71
72							Report bugs to the Bioperl bug tracking system to help us keep track
73							the bugs and their resolution. Bug reports can be submitted via the web:
74
75							https://github.com/bioperl/bioperl-live/issues
76
77							=head1 AUTHOR - Bioperl Project
78
79							bioperl-l at bioperl.org
80
81							=head1 APPENDIX
82
83							The rest of the documentation details each of the object
84							methods. Internal methods are usually preceded with a _
85
86							=cut
87
88							# POD is at the end of the module
89
90							# Let the code begin...
91
92							# Let the code begin...
93
94							package Bio::SeqIO::swissdriver;
95	1			1		14	use vars qw(%FTQUAL_NO_QUOTE);
	1					4
	1					92
96	1			1		10	use strict;
	1					3
	1					49
97	1			1		8	use Bio::SeqIO::Handler::GenericRichSeqHandler;
	1					3
	1					44
98	1			1		8	use Data::Dumper;
	1					4
	1					94
99
100	1			1		9	use base qw(Bio::SeqIO);
	1					5
	1					1622
101
102							# signals to process what's in the hash prior to next round, maps ann => names
103							my %SEC = (
104							OC => 'CLASSIFICATION',
105							OH => 'HOST', # not currently handled, bundled with organism data for now
106							OG => 'ORGANELLE',
107							OX => 'CROSSREF',
108							RA => 'AUTHORS',
109							RC => 'COMMENT',
110							RG => 'CONSRTM',
111							RP => 'POSITION',
112							RX => 'CROSSREF',
113							RT => 'TITLE',
114							RL => 'JOURNAL',
115							AS => 'ASSEMBLYINFO', # Third party annotation
116							'//' => 'RECORDEND'
117							);
118
119							# add specialized delimiters here for easier postprocessing
120							my %DELIM = (
121							CC => "\n",
122							DR => "\n",
123							DT => "\n",
124							);
125
126							sub _initialize {
127	9			9		42	my($self,@args) = @_;
128
129	9					63	$self->SUPER::_initialize(@args);
130	9					56	my $handler = $self->_rearrange([qw(HANDLER)],@args);
131							# hash for functions for decoding keys.
132	9	50				65	$handler ? $self->seqhandler($handler) :
133							$self->seqhandler(Bio::SeqIO::Handler::GenericRichSeqHandler->new(
134							-format => 'swiss',
135							-verbose => $self->verbose,
136							-builder => $self->sequence_builder
137							));
138	9	50				62	if( ! defined $self->sequence_factory ) {
139	9					36	$self->sequence_factory(Bio::Seq::SeqFactory->new
140							(-verbose => $self->verbose(),
141							-type => 'Bio::Seq::RichSeq'));
142							}
143							}
144
145							=head2 next_seq
146
147							Title : next_seq
148							Usage : $seq = $stream->next_seq()
149							Function: returns the next sequence in the stream
150							Returns : Bio::Seq object
151							Args : none
152
153							=cut
154
155							sub next_seq {
156	19			19	1	4195	my $self = shift;
157	19					95	my $hobj = $self->seqhandler;
158	19					141	local($/) = "\n";
159							# these contain values that need to carry over each round
160	19					69	my ($featkey, $qual, $annkey, $seqdata, $location);
161	19					68	my $lastann = '';
162	19					53	my $ct = 0;
163							# main parser
164							PARSER:
165	19					154	while(defined(my $line = $self->_readline)) {
166	1866					3459	chomp $line;
167	1866					7204	my ($ann, $data) = split(m{\s+}, $line, 2);
168	1866	100				3947	if ($ann) {
169	1848	100				2927	if ($ann eq 'FT') {
170							# sequence features
171	114	100				679	if ($data =~ m{^(\w+)\s+([\d\?\<]+)\s+([\d\?\>]+)(?:\s+?(\S.*))?}ox) {
		50
172							# has location data and desc
173	108	50				298	if ($seqdata) {
174	108					423	$hobj->data_handler($seqdata);
175	108					312	$seqdata = ();
176							}
177	108					751	($seqdata->{FEATURE_KEY}, my $loc1, my $loc2, $data) = ($1, $2, $3, $4);
178	108					251	$qual = 'description';
179	108					310	$seqdata->{$qual} = $data;
180	108					272	$seqdata->{NAME} = $ann;
181	108	50				439	$seqdata->{LOCATION} = "$loc1..$loc2" if defined $loc1;
182	108					578	next PARSER;
183							} elsif ($data =~ m{^\s+/([^=]+)(?:=(.+))?}ox) {
184							# has qualifer
185	0		0			0	($qual, $data) = ($1, $2 \|\| '');
186							$ct = ($seqdata->{$qual}) ?
187	0	0				0	((ref($seqdata->{$qual})) ? scalar(@{ $seqdata->{$qual} }) : 1)
	0	0				0
188							: 0 ;
189							}
190	6					46	$data =~ s{\.$}{};
191	6	50				35	if ($ct == 0) {
192	6	50				60	$seqdata->{$qual} .= ($seqdata->{$qual}) ?
193							' '.$data : $data;
194							} else {
195	0	0				0	if (!ref($seqdata->{$qual})) {
196	0					0	$seqdata->{$qual} = [$seqdata->{$qual}];
197							}
198							($seqdata->{$qual}->[$ct]) ?
199							($seqdata->{$qual}->[$ct] .= ' '.$data) :
200	0	0				0	($seqdata->{$qual}->[$ct] .= $data);
201							}
202							} else {
203							# simple annotations
204	1734	100				2787	if ($ann ne $lastann) {
205	905	100	100			2652	if (!$SEC{$ann} && $seqdata) {
206	263					1212	$hobj->data_handler($seqdata);
207							# can't use undef here; it can lead to subtle mem leaks
208	263					1131	$seqdata = ();
209							}
210							$annkey = (!$SEC{$ann}) ? 'DATA' : # primary data
211	905	100				2130	$SEC{$ann};
212	905	100				1998	$seqdata->{'NAME'} = $ann if !$SEC{$ann};
213							}
214	1734	50				2775	last PARSER if $ann eq '//';
215	1734	100				2548	next PARSER if $ann eq 'SQ';
216	1717		100			4089	my $delim = $DELIM{$ann} \|\| ' ';
217	1717	100				5004	$seqdata->{$annkey} .= ($seqdata->{$annkey}) ?
218							$delim.$data : $data;
219	1717					5138	$lastann = $ann;
220							}
221							} else {
222							# this should only be sequence (fingers crossed!)
223							SEQUENCE:
224	18					74	while (defined ($line = $self->_readline)) {
225	116	100				285	if (index($line, '//') == 0) {
226	17					59	$data =~ tr{0-9 \n}{}d;
227	17					60	$seqdata->{DATA} = $data;
228							#$self->debug(Dumper($seqdata));
229	17					72	$hobj->data_handler($seqdata);
230	17					46	$seqdata = ();
231	17					75	last PARSER;
232							} else {
233	99					244	$data .= $line;
234	99					233	$line = undef;
235							}
236							}
237							}
238							}
239							# some files have no // for the last file; this catches the last bit o' data
240	19	50				85	$hobj->data_handler($seqdata) if $seqdata;
241	19					106	return $hobj->build_sequence;
242							}
243
244							=head2 write_seq
245
246							Title : write_seq
247							Usage : $stream->write_seq($seq)
248							Function: writes the $seq object (must be seq) to the stream
249							Returns : 1 for success and 0 for error
250							Args : array of 1 to n Bio::SeqI objects
251
252							=cut
253
254							sub write_seq {
255	0			0	1	0	shift->throw("Use Bio::SeqIO::swiss write_seq() for output");
256							# maybe make a Writer class as well????
257							}
258
259							=head2 seqhandler
260
261							Title : seqhandler
262							Usage : $stream->seqhandler($handler)
263							Function: Get/Set the Bio::Seq::HandlerBaseI object
264							Returns : Bio::Seq::HandlerBaseI
265							Args : Bio::Seq::HandlerBaseI
266
267							=cut
268
269							sub seqhandler {
270	28			28	1	123	my ($self, $handler) = @_;
271	28	100				120	if ($handler) {
272	9	50	33			93	$self->throw("Not a Bio::HandlerBaseI") unless
273							ref($handler) && $handler->isa("Bio::HandlerBaseI");
274	9					50	$self->{'_seqhandler'} = $handler;
275							}
276	28					90	return $self->{'_seqhandler'};
277							}
278
279							1;
280
281							__END__