File Coverage

Bio/SeqIO/kegg.pm

Criterion	Covered	Total	%
statement	99	109	90.8
branch	14	28	50.0
condition	3	9	33.3
subroutine	10	11	90.9
pod	2	2	100.0
total	128	159	80.5

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# BioPerl module for Bio::SeqIO::kegg
3							#
4							# Please direct questions and support issues to
5							#
6							# Cared for by Allen Day
7							#
8							# Copyright Allen Day
9							#
10							# You may distribute this module under the same terms as perl itself
11
12							# POD documentation - main docs before the code
13
14							=head1 NAME
15
16							Bio::SeqIO::kegg - KEGG sequence input/output stream
17
18							=head1 SYNOPSIS
19
20							# It is probably best not to use this object directly, but
21							# rather go through the SeqIO handler system. Go:
22
23							use Bio::SeqIO;
24
25							$stream = Bio::SeqIO->new(-file => $filename, -format => 'KEGG');
26
27							while ( my $seq = $stream->next_seq() ) {
28							# do something with $seq
29							}
30
31							=head1 DESCRIPTION
32
33							This class transforms KEGG gene records into Bio::Seq objects.
34
35							=head2 Mapping of record properties to object properties
36
37							This section is supposed to document which sections and properties of
38							a KEGG databank record end up where in the Bioperl object model. It
39							is far from complete and presently focuses only on those mappings
40							which may be non-obvious. $seq in the text refers to the
41							Bio::Seq::RichSeqI implementing object returned by the parser for each
42							record.
43
44							=over 4
45
46							=item 'ENTRY'
47
48							$seq->primary_id
49
50							=item 'NAME'
51
52							$seq->display_id
53
54							=item 'DEFINITION'
55
56							$seq->annotation->get_Annotations('description');
57
58							=item 'ORTHOLOG'
59
60							grep {$_->database eq 'KO'} $seq->annotation->get_Annotations('dblink')
61
62							=item 'CLASS'
63
64							grep {$_->database eq 'PATH'}
65							$seq->annotation->get_Annotations('dblink')
66
67							=item 'POSITION'
68
69							FIXME, NOT IMPLEMENTED
70
71							=item 'PATHWAY'
72
73							for my $pathway ( $seq->annotation->get_Annotations('pathway') ) {
74							#
75							}
76
77							=item 'DBLINKS'
78
79							$seq->annotation->get_Annotations('dblink')
80
81							=item 'CODON_USAGE'
82
83							FIXME, NOT IMPLEMENTED
84
85							=item 'AASEQ'
86
87							$seq->translate->seq
88
89							=item 'NTSEQ'
90
91							$seq-Eseq
92
93							=back
94
95							=head1 FEEDBACK
96
97							=head2 Mailing Lists
98
99							User feedback is an integral part of the evolution of this and other
100							Bioperl modules. Send your comments and suggestions preferably to one
101							of the Bioperl mailing lists. Your participation is much appreciated.
102
103							bioperl-l@bioperl.org - General discussion
104							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
105
106							=head2 Support
107
108							Please direct usage questions or support issues to the mailing list:
109
110							I
111
112							rather than to the module maintainer directly. Many experienced and
113							reponsive experts will be able look at the problem and quickly
114							address it. Please include a thorough description of the problem
115							with code and data examples if at all possible.
116
117							=head2 Reporting Bugs
118
119							Report bugs to the Bioperl bug tracking system to help us keep track
120							the bugs and their resolution. Bug reports can be submitted via the web:
121
122							https://github.com/bioperl/bioperl-live/issues
123
124							=head1 AUTHOR - Allen Day
125
126							Email allenday@ucla.edu
127
128							=head1 APPENDIX
129
130							The rest of the documentation details each of the object
131							methods. Internal methods are usually preceded with a _
132
133							=cut
134
135							# Let the code begin...
136
137							package Bio::SeqIO::kegg;
138	1			1		419	use strict;
	1					2
	1					23
139
140	1			1		276	use Bio::SeqFeature::Generic;
	1					2
	1					39
141	1			1		269	use Bio::Species;
	1					2
	1					24
142	1			1		5	use Bio::Seq::SeqFactory;
	1					1
	1					17
143	1			1		3	use Bio::Annotation::Collection;
	1					2
	1					17
144	1			1		240	use Bio::Annotation::Comment;
	1					1
	1					21
145	1			1		252	use Bio::Annotation::DBLink;
	1					2
	1					23
146
147	1			1		5	use base qw(Bio::SeqIO);
	1					2
	1					291
148
149							sub _initialize {
150	1			1		2	my($self,@args) = @_;
151
152	1					5	$self->SUPER::_initialize(@args);
153							# hash for functions for decoding keys.
154	1					3	$self->{'_func_ftunit_hash'} = {};
155	1	50				5	if( ! defined $self->sequence_factory ) {
156	1					3	$self->sequence_factory(Bio::Seq::SeqFactory->new
157							(-verbose => $self->verbose(),
158							-type => 'Bio::Seq::RichSeq'));
159							}
160							}
161
162							=head2 next_seq
163
164							Title : next_seq
165							Usage : $seq = $stream->next_seq()
166							Function: returns the next sequence in the stream
167							Returns : Bio::Seq::RichSeq object
168							Args :
169
170							=cut
171
172							sub next_seq {
173	1			1	1	486	my ($self,@args) = @_;
174	1					4	my $builder = $self->sequence_builder();
175	1					5	my $seq;
176							my %params;
177
178	1					0	my $buffer;
179	1					0	my (@acc, @features);
180	1					0	my ($display_id, $annotation);
181	1					0	my $species;
182
183							# initialize; we may come here because of starting over
184	1					2	@features = ();
185	1					2	$annotation = undef;
186	1					2	@acc = ();
187	1					1	$species = undef;
188	1					3	%params = (-verbose => $self->verbose); # reset hash
189	1					4	local($/) = "///\n";
190
191	1					7	$buffer = $self->_readline();
192
193	1	50				3	return if( !defined $buffer ); # end of file
194	1	50				5	$buffer =~ /^ENTRY/ \|\|
195							$self->throw("KEGG stream with bad ENTRY line. Not KEGG in my book. Got $buffer'");
196
197	1					1	my %FIELDS;
198	1					16	my @chunks = split /\n(?=\S)/, $buffer;
199
200	1					3	foreach my $chunk (@chunks){
201	11					19	my($key) = $chunk =~ /^(\S+)/;
202	11					21	$FIELDS{$key} = $chunk;
203							}
204
205							# changing to split method to get entry_ids that include
206							# sequence version like Whatever.1
207							my(undef,$entry_id,$entry_seqtype,$entry_species) =
208	1					3	split(' ',$FIELDS{ENTRY});
209
210	1					2	my($name);
211	1	50				3	if ($FIELDS{NAME}) {
212	1					4	($name) = $FIELDS{NAME} =~ /^NAME\s+(.+)$/;
213							}
214
215	1					2	my( $definition, $aa_length, $aa_seq, $nt_length, $nt_seq );
216
217	1	50	33			11	if(( exists $FIELDS{DEFINITION} ) and ( $FIELDS{DEFINITION} =~ /^DEFINITION/ )) {
218	1					4	($definition) = $FIELDS{DEFINITION} =~ /^DEFINITION\s+(.+)$/s;
219	1					6	$definition =~ s/\s+/ /gs;
220							}
221	1	50	33			6	if(( exists $FIELDS{AASEQ} ) and ( $FIELDS{AASEQ} =~ /^AASEQ/ )) {
222	1					5	($aa_length,$aa_seq) = $FIELDS{AASEQ} =~ /^AASEQ\s+(\d+)\n(.+)$/s;
223	1					8	$aa_seq =~ s/\s+//g;
224							}
225	1	50	33			6	if(( exists $FIELDS{NTSEQ} ) and ( $FIELDS{NTSEQ} =~ /^NTSEQ/ )) {
226	1					4	($nt_length,$nt_seq) = $FIELDS{NTSEQ} =~ /^NTSEQ\s+(\d+)\n(.+)$/s;
227	1					16	$nt_seq =~ s/\s+//g;
228							}
229
230	1					7	$annotation = Bio::Annotation::Collection->new();
231
232	1					6	$annotation->add_Annotation('description',
233							Bio::Annotation::Comment->new(-text => $definition));
234
235	1					3	$annotation->add_Annotation('aa_seq',
236							Bio::Annotation::Comment->new(-text => $aa_seq));
237
238	1					2	my($ortholog_db,$ortholog_id,$ortholog_desc);
239	1	50				2	if ($FIELDS{ORTHOLOG}) {
240							($ortholog_db,$ortholog_id,$ortholog_desc) = $FIELDS{ORTHOLOG}
241	1					7	=~ /^ORTHOLOG\s+(\S+):\s+(\S+)\s+(.*?)$/;
242
243	1					8	$annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
244							-database => $ortholog_db,
245							-primary_id => $ortholog_id,
246							-comment => $ortholog_desc) );
247							}
248
249	1	50				3	if($FIELDS{MOTIF}){
250	0					0	$FIELDS{MOTIF} =~ s/^MOTIF\s+//;
251	0					0	while($FIELDS{MOTIF} =~/\s*?(\S+):\s+(.+?)$/mg){
252	0					0	my $db = $1;
253	0					0	my $ids = $2;
254	0					0	foreach my $id (split(/\s+/, $ids)){
255
256	0					0	$annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
257							-database =>$db,
258							-primary_id => $id,
259							-comment => "") );
260							}
261							}
262							}
263
264	1	50				2	if($FIELDS{PATHWAY}) {
265	0					0	$FIELDS{PATHWAY} =~ s/^PATHWAY\s+//;
266	0					0	while($FIELDS{PATHWAY} =~ /\s*PATH:\s+(.+)$/mg){
267	0					0	$annotation->add_Annotation('pathway',
268							Bio::Annotation::Comment->new(-text => "$1"));
269							}
270							}
271
272	1	50				2	if($FIELDS{POSITION}) {
273	1					5	$FIELDS{POSITION} =~ s/^POSITION\s+//;
274							$annotation->add_Annotation('position',
275	1					4	Bio::Annotation::Comment->new(-text => $FIELDS{POSITION}));
276							}
277
278	1	50				3	if ($FIELDS{CLASS}) {
279	1					4	$FIELDS{CLASS} =~ s/^CLASS\s+//;
280	1					5	$FIELDS{'CLASS'} =~ s/\n//g;
281	1					6	while($FIELDS{CLASS} =~ /(.*?)\[(\S+):(\S+)\]/g){
282	2					8	my ($pathway,$db,$id) = ($1,$2,$3);
283	2					11	$pathway =~ s/\s+/ /g;
284	2					6	$pathway =~ s/\s$//g;
285	2					4	$pathway =~ s/^\s+//;
286	2					5	$annotation->add_Annotation('pathway',
287							Bio::Annotation::Comment->new(-text => $pathway));
288
289	2					7	$annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
290							-database => $db, -primary_id => $id));
291							}
292							}
293
294	1	50				3	if($FIELDS{DBLINKS}) {
295	1					4	$FIELDS{DBLINKS} =~ s/^DBLINKS/ /;
296	1					7	while($FIELDS{DBLINKS} =~ /\s+(\S+):\s+(\S+)\n?/gs){ ### modified
297	4	50				15	$annotation->add_Annotation('dblink',Bio::Annotation::DBLink->new(
298							-database => $1, -primary_id => $2)) if $1;
299							}
300							}
301
302	1					2	$params{'-alphabet'} = 'dna';
303	1					2	$params{'-seq'} = $nt_seq;
304	1					2	$params{'-display_id'} = $name;
305	1					2	$params{'-accession_number'} = $entry_id;
306	1					6	$params{'-species'} = Bio::Species->new(
307							-common_name => $entry_species);
308	1					2	$params{'-annotation'} = $annotation;
309
310	1					6	$builder->add_slot_value(%params);
311	1					4	$seq = $builder->make_object();
312
313	1					8	return $seq;
314							}
315
316							=head2 write_seq
317
318							Title : write_seq
319							Note : write_seq() is not implemented for KEGG format output.
320
321							=cut
322
323							sub write_seq {
324	0			0	1		shift->throw("write_seq() not implemented for KEGG format output.");
325							}
326
327							1;