File Coverage

blib/lib/Bio/FeatureIO/vecscreen_simple.pm
Criterion Covered Total %
statement 33 37 89.1
branch 13 18 72.2
condition n/a
subroutine 6 7 85.7
pod 2 2 100.0
total 54 64 84.3


line stmt bran cond sub pod time code
1             =pod
2              
3             =head1 NAME
4              
5             Bio::FeatureIO::vecscreen_simple - read/write features from NCBI vecscreen -f 3
6             output
7              
8             =head1 SYNOPSIS
9              
10             # read features
11             my $fin = Bio::FeatureIO->new(-file=>'vecscreen.out',
12             -format=>'vecscreen_simple');
13             my @vec_regions;
14             while (my $f = $fin->next_feature) {
15             push @vec_regions, $f;
16             }
17            
18             # write features NOT IMPLEMENTED
19              
20             =head1 DESCRIPTION
21              
22             vecscreen is a system for quickly identifying segments of a nucleic
23             acid sequence that may be of vector origin. NCBI developed vecscreen
24             to minimize the incidence and impact of vector contamination in public
25             sequence databases. GenBank Annotation Staff use vecscreen to verify
26             that sequences submitted for inclusion in the database are free from
27             contaminating vector sequence. Any sequence can be screened for vector
28             contamination using vecscreen.
29              
30             This module provides parsing for vecscreen '-f 3' output, described in
31             the vecscreen documentation as 'Text list, no alignments'
32              
33             =head1 FEEDBACK
34              
35             =head2 Mailing Lists
36              
37             User feedback is an integral part of the evolution of this and other
38             Bioperl modules. Send your comments and suggestions preferably to
39             the Bioperl mailing list. Your participation is much appreciated.
40              
41             bioperl-l@bioperl.org - General discussion
42             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
43              
44             =head2 Support
45              
46             Please direct usage questions or support issues to the mailing list:
47              
48             I
49              
50             rather than to the module maintainer directly. Many experienced and
51             reponsive experts will be able look at the problem and quickly
52             address it. Please include a thorough description of the problem
53             with code and data examples if at all possible.
54              
55             =head2 Reporting Bugs
56              
57             Report bugs to the Bioperl bug tracking system to help us keep track
58             of the bugs and their resolution. Bug reports can be submitted via
59             the web:
60              
61             http://bugzilla.open-bio.org/
62              
63             =head1 AUTHOR - Robert Buels
64              
65             Email rmb32 AT cornell.edu
66              
67             =head1 CONTRIBUTORS
68              
69             Based on ptt.pm by Torsten Seeman
70              
71             =head1 APPENDIX
72              
73             The rest of the documentation details each of the object methods.
74             Internal methods are usually preceded with a _
75              
76             =cut
77              
78             # Let the code begin...
79              
80             package Bio::FeatureIO::vecscreen_simple;
81             BEGIN {
82 2     2   1561 $Bio::FeatureIO::vecscreen_simple::AUTHORITY = 'cpan:BIOPERLML';
83             }
84             $Bio::FeatureIO::vecscreen_simple::VERSION = '1.6.905';
85 2     2   17 use strict;
  2         6  
  2         69  
86 2     2   12 use base qw(Bio::FeatureIO);
  2         6  
  2         201  
87 2     2   567 use Bio::SeqFeature::Generic;
  2         98868  
  2         1122  
88              
89             =head2 _initialize
90              
91             Title : _initialize
92             Function: Reading? parses the header of the input
93             Writing?
94              
95             =cut
96              
97             sub _initialize {
98 1     1   7 my($self,%arg) = @_;
99              
100 1         17 $self->SUPER::_initialize(%arg);
101              
102 1 50       429 if ($self->mode eq 'r') {
103 1         59 $self->{parse_state}->{seqname} = '';
104 1         7 $self->{parse_state}->{matchtype} = '';
105             }
106             else {
107 0         0 $self->throw('vecscreen_simple feature writing not implemented');
108             }
109             }
110              
111             =head2 next_feature
112              
113             Title : next_feature
114             Usage : $io->next_feature()
115             Function: read the next feature from the vecscreen output file
116             Args : none
117             Returns : Bio::SeqFeatureI object
118              
119             =cut
120              
121             sub next_feature {
122 15     15 1 11262 my $self = shift;
123 15 50       116 return unless $self->mode eq 'r'; # returns if can't read next_feature when we're in write mode
124              
125 15         203 while ( my $line = $self->_readline() ) {
126 42         1569 chomp $line;
127 42 100       203 if ( $line =~ /^>Vector (\S+)/ ) {
    50          
    100          
128 7         40 $self->{parse_state}{seqname} = $1;
129             } elsif ( $line =~ /^\s*WARNING/ ) {
130 0         0 $self->warn("$self->{parse_state}{seqname}: vecscreen says: $line\n");
131             } elsif ( $line =~ /\S/ ) {
132              
133             $self->{parse_state}{seqname}
134 28 50       81 or $self->throw("Unexpected line in vecscreen output '$line'");
135              
136             #if it's not a vector line, it should be either a match type or
137             #a coordinates line
138 28         67 my $lcline = lc $line;
139              
140 28 100       155 if ( $line =~ /^(\d+)\s+(\d+)\s*$/ ) {
    100          
    50          
141 14         61 my ($s,$e) = ($1,$2);
142              
143 14         70 my $matchtype = $self->{parse_state}{matchtype};
144 14         106 $matchtype =~ s/\s/_/g; #replace whitespace with underscores for the primary tag
145             return Bio::SeqFeature::Generic->new( -start => $s,
146             -end => $e,
147             -primary => $matchtype,
148             -seq_id => $self->{parse_state}{seqname},
149 14         90 );
150             } elsif ( $lcline eq 'no hits found' ) {
151 2         10 $self->{parse_state}{seqname} = '';
152             } elsif ( grep $lcline eq $_, 'strong match', 'moderate match', 'weak match', 'suspect origin') {
153 12         44 $self->{parse_state}{matchtype} = $lcline;
154             } else {
155 0         0 $self->throw("Parse error. Expected a match type or coordinate line but got '$line'");
156             }
157             } else {
158             #blank line, ignore it and reset parser
159              
160 7         17 $self->{parse_state}{seqname} = ''; #< a line with whitespace
161             #indicates a boundary
162             #between output for
163             #different sequences
164 7         21 $self->{parse_state}{matchtype} = '';
165             }
166             }
167              
168 1         48 return;
169             }
170              
171             =head2 write_feature (NOT IMPLEMENTED)
172              
173             Title : write_feature
174             Usage : $io->write_feature($feature)
175             Function: write a Bio::SeqFeatureI object in vecscreen -f 3 format
176             Example :
177             Args : Bio::SeqFeatureI object
178             Returns :
179              
180             =cut
181              
182             sub write_feature {
183 0     0 1   shift->throw_not_implemented;
184             }
185              
186              
187             ###
188             1;#do not remove
189             ###