File Coverage

blib/lib/Bio/FeatureIO/ptt.pm
Criterion Covered Total %
statement 37 40 92.5
branch 14 22 63.6
condition n/a
subroutine 8 9 88.8
pod 4 4 100.0
total 63 75 84.0


line stmt bran cond sub pod time code
1             =pod
2              
3             =head1 NAME
4              
5             Bio::FeatureIO::ptt - read/write features in PTT format
6              
7             =head1 SYNOPSIS
8              
9             # read features
10             my $fin = Bio::FeatureIO->new(-file=>'genes.ptt', -format=>'ptt');
11             my @cds;
12             while (my $f = $fin->next_feature) {
13             push @cds, $f if $f->strand > 0;
14             }
15              
16             # write features (NOT IMPLEMENTED)
17             my $fout = Bio::FeatureIO->new(-fh=>\*STDOUT, -format=>'ptt');
18             for my $f (@cds) {
19             $fout->write_feature($f);
20             }
21              
22             =head1 DESCRIPTION
23              
24             The PTT file format is a table of protein features.
25             It is used mainly by NCBI who produce PTT files for
26             all their published genomes found in L.
27             It has the following format:
28              
29             =over 4
30              
31             =item Line 1
32              
33             Description of sequence to which the features belong
34             eg. "Leptospira interrogans chromosome II, complete sequence - 0..358943"
35              
36             It is usually equivalent to the DEFINITION line of a Genbank file,
37             with the length of the sequence appended. It is unclear why "0" is
38             used as a starting range, it should be "1".
39              
40             =item Line 2
41              
42             Number of feature lines in the table
43             eg. "367 proteins"
44              
45             =item Line 3
46              
47             Column headers, tab separated
48             eg. "Location Strand Length PID Gene Synonym Code COG Product"
49              
50             Location : "begin..end" span of feature
51             Strand : "+" or "-"
52             Length : number of amino acids excluding the stop codon
53             PID : analogous to Genbank /db_xref="GI:xxxxxxxxx"
54             Gene : analogous to Genbank /gene="xxxx"
55             Synonym : analogous to Genbank /locus_tag="xxxx"
56             Synonym : analogous to Genbank /locus_tag="xxxx"
57             COG : CDD COG code with COG letter categories appended
58             Product : analogous to Genbank /product="xxxx"
59              
60             =item Line 4 onwards
61              
62             Feature lines, nine columns, tab separated, "-" used for empty fields
63             eg. "2491..3423 + 310 24217063 metF LB002 - COG0685E 5,10-methylenetetrahydrofolate reductase"
64              
65              
66             =back
67              
68             =head1 FEEDBACK
69              
70             =head2 Mailing Lists
71              
72             User feedback is an integral part of the evolution of this and other
73             Bioperl modules. Send your comments and suggestions preferably to
74             the Bioperl mailing list. Your participation is much appreciated.
75              
76             bioperl-l@bioperl.org - General discussion
77             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
78              
79             =head2 Support
80              
81             Please direct usage questions or support issues to the mailing list:
82              
83             I
84              
85             rather than to the module maintainer directly. Many experienced and
86             reponsive experts will be able look at the problem and quickly
87             address it. Please include a thorough description of the problem
88             with code and data examples if at all possible.
89              
90             =head2 Reporting Bugs
91              
92             Report bugs to the Bioperl bug tracking system to help us keep track
93             of the bugs and their resolution. Bug reports can be submitted via
94             the web:
95              
96             http://bugzilla.open-bio.org/
97              
98             =head1 AUTHOR - Torsten Seemann
99              
100             Email torsten.seemann AT infotech.monash.edu.au
101              
102             =head1 CONTRIBUTORS
103              
104             Based on bed.pm and gff.pm by Allen Day.
105              
106             =head1 APPENDIX
107              
108             The rest of the documentation details each of the object methods.
109             Internal methods are usually preceded with a _
110              
111             =cut
112              
113              
114             # Let the code begin...
115              
116             package Bio::FeatureIO::ptt;
117             BEGIN {
118 2     2   1504 $Bio::FeatureIO::ptt::AUTHORITY = 'cpan:BIOPERLML';
119             }
120             $Bio::FeatureIO::ptt::VERSION = '1.6.905';
121 2     2   14 use strict;
  2         2  
  2         42  
122 2     2   8 use base qw(Bio::FeatureIO);
  2         4  
  2         148  
123 2     2   376 use Bio::SeqFeature::Generic;
  2         54529  
  2         942  
124              
125             # map tab-separated column number to field name
126             our %NAME_OF = (
127             0 => 'Location',
128             1 => 'Strand',
129             2 => 'Length',
130             3 => 'PID',
131             4 => 'Gene',
132             5 => 'Synonym',
133             6 => 'Code',
134             7 => 'COG',
135             8 => 'Product',
136             );
137             our $NUM_COL = 9;
138              
139             =head2 _initialize
140              
141             Title : _initialize
142             Function: Reading? parses the header of the input
143             Writing?
144              
145             =cut
146              
147             sub _initialize {
148 1     1   4 my($self,%arg) = @_;
149              
150 1         10 $self->SUPER::_initialize(%arg);
151              
152 1 50       226 if ($self->mode eq 'r') {
153             # Line 1
154 1         39 my $desc = $self->_readline();
155 1         42 chomp $desc;
156 1         15 $self->description($desc);
157             # Line 2
158 1         4 my $line = $self->_readline();
159 1 50       23 $line =~ m/^(\d+) proteins/ or $self->throw("Invalid protein count");
160 1         5 $self->protein_count($1);
161             # Line 3
162 1         2 $self->_readline();
163             }
164             }
165              
166             =head2 next_feature
167              
168             Title : next_feature
169             Usage : $io->next_feature()
170             Function: read the next feature from the PTT file
171             Example :
172             Args :
173             Returns : Bio::SeqFeatureI object
174              
175             =cut
176              
177             sub next_feature {
178 368     368 1 23022 my $self = shift;
179 368 50       786 $self->mode eq 'r' || return; # returns if can't read next_feature when we're in write mode
180            
181 368 100       2808 my $line = $self->_readline() or return; # returns if end of file, no more features?
182 367         7910 chomp $line;
183 367         1152 my @col = split m/\t/, $line;
184 367 50       736 @col==$NUM_COL or $self->throw("Too many columns for PTT line");
185              
186 367 50       1688 $col[0] =~ m/(\d+)\.\.(\d+)/ or $self->throw("Invalid location (column 1)");
187 367         1179 my $feat = Bio::SeqFeature::Generic->new(-start=>$1, -end=>$2, -primary=>'CDS');
188 367 50       66357 $col[1] =~ m/^([+-])$/ or $self->throw("Invalid strand (column 2)");
189 367 100       1744 $feat->strand($1 eq '+' ? +1 : -1);
190 367         6743 for my $i (2 .. $NUM_COL-1) {
191 2569 100       14457 $feat->add_tag_value($NAME_OF{$i}, $col[$i]) if $col[$i] ne '-';
192             }
193 367         3286 return $feat;
194             }
195              
196             =head2 write_feature (NOT IMPLEMENTED)
197              
198             Title : write_feature
199             Usage : $io->write_feature($feature)
200             Function: write a Bio::SeqFeatureI object in PTT format
201             Example :
202             Args : Bio::SeqFeatureI object
203             Returns :
204              
205             =cut
206              
207             sub write_feature {
208 0     0 1 0 shift->throw_not_implemented;
209             }
210              
211             =head2 description
212              
213             Title : description
214             Usage : $obj->description($newval)
215             Function: set/get the PTT file description for/from line one
216             Example :
217             Returns : value of description (a scalar)
218             Args : on set, new value (a scalar or undef, optional)
219              
220             =cut
221              
222             sub description {
223 1     1 1 2 my $self = shift;
224 1 50       9 return $self->{'description'} = shift if @_;
225 0         0 return $self->{'description'};
226             }
227              
228             =head2 protein_count
229              
230             Title : protein_count
231             Usage : $obj->protein_count($newval)
232             Function: set/get the PTT protein count for/from line two
233             Example :
234             Args : on set, new value (a scalar or undef, optional)
235             Returns : value of protein_count (a scalar)
236              
237             =cut
238              
239             sub protein_count {
240 1     1 1 2 my $self = shift;
241 1 50       6 return $self->{'protein_count'} = shift if @_;
242 0           return $self->{'protein_count'};
243             }
244              
245             1;