File Coverage

Bio/Tools/Signalp.pm
Criterion Covered Total %
statement 61 64 95.3
branch 19 22 86.3
condition 2 3 66.6
subroutine 9 9 100.0
pod 2 2 100.0
total 93 100 93.0


line stmt bran cond sub pod time code
1             # Parser module for Signalp Bio::Tools::Signalp
2             #
3             #
4             # Based on the EnsEMBL module
5             # Bio::EnsEMBL::Pipeline::Runnable::Protein::Signalp originally
6             # written by Marc Sohrmann (ms2@sanger.ac.uk) Written in BioPipe by
7             # Please direct questions and support issues to
8             #
9             # Balamurugan Kumarasamy Cared for by the Fugu
10             # Informatics team (fuguteam@fugu-sg.org)
11              
12             # You may distribute this module under the same terms as perl itself
13             #
14             # POD documentation - main docs before the code
15              
16             =head1 NAME
17              
18             Bio::Tools::Signalp - parser for Signalp output
19              
20             =head1 SYNOPSIS
21              
22             use Bio::Tools::Signalp;
23              
24             my $parser = Bio::Tools::Signalp->new(-fh =>$filehandle );
25              
26             while( my $sp_feat = $parser->next_result ) {
27             if ($sp_feat->score > 0.9) {
28             push @likely_sigpep, $sp_feat;
29             }
30             }
31              
32             =head1 DESCRIPTION
33              
34             C predicts the presence and location of signal peptide
35             cleavage sites in amino acid sequences.
36              
37             L parses the output of C to provide a
38             L object describing the signal peptide
39             found, if any. It returns a variety of tags extracted from the NN and HMM
40             analysis. Most importantly, the C attribute contains the
41             NN probability of this being a true signal peptide.
42              
43              
44             =head1 FEEDBACK
45              
46             =head2 Mailing Lists
47              
48             User feedback is an integral part of the evolution of this and other
49             Bioperl modules. Send your comments and suggestions preferably to
50             the Bioperl mailing list. Your participation is much appreciated.
51              
52             bioperl-l@bioperl.org - General discussion
53             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
54              
55             =head2 Support
56              
57             Please direct usage questions or support issues to the mailing list:
58              
59             I
60              
61             rather than to the module maintainer directly. Many experienced and
62             reponsive experts will be able look at the problem and quickly
63             address it. Please include a thorough description of the problem
64             with code and data examples if at all possible.
65              
66             =head2 Reporting Bugs
67              
68             Report bugs to the Bioperl bug tracking system to help us keep track
69             of the bugs and their resolution. Bug reports can be submitted va the
70             web:
71              
72             https://github.com/bioperl/bioperl-live/issues
73              
74             =head1 AUTHOR
75              
76             # Please direct questions and support issues to I
77              
78             Based on the EnsEMBL module Bio::EnsEMBL::Pipeline::Runnable::Protein::Signalp
79             originally written by Marc Sohrmann (ms2_AT_sanger.ac.uk). Written in BioPipe by
80             Balamurugan Kumarasamy savikalpa_AT_fugu-sg.org. Cared for by the Fugu
81             Informatics team (fuguteam_AT_fugu-sg.org)
82              
83             =head1 CONTRIBUTORS
84              
85             Torsten Seemann - torsten.seemann AT infotech.monash.edu.au
86              
87             =head1 APPENDIX
88              
89             The rest of the documentation details each of the object methods.
90             Internal methods are usually preceded with a _
91              
92             =cut
93              
94             package Bio::Tools::Signalp;
95 2     2   435 use strict;
  2         2  
  2         47  
96              
97 2     2   273 use Bio::SeqFeature::Generic;
  2         4  
  2         45  
98 2     2   8 use base qw(Bio::Root::Root Bio::Root::IO);
  2         3  
  2         1197  
99              
100              
101              
102             =head2 new
103              
104             Title : new
105             Usage : my $obj = Bio::Tools::Signalp->new();
106             Function: Builds a new Bio::Tools::Signalp object
107             Returns : Bio::Tools::Signalp
108             Args : -fh/-file => $val, # for initing input, see Bio::Root::IO
109              
110             =cut
111              
112             sub new {
113 12     12 1 24 my($class,@args) = @_;
114              
115 12         48 my $self = $class->SUPER::new(@args);
116 12         41 $self->_initialize_io(@args);
117              
118 12         33 return $self;
119             }
120              
121             =head2 next_result
122              
123             Title : next_result
124             Usage : my $feat = $signalp->next_result
125             Function: Get the next result set from parser data
126             Returns : Bio::SeqFeature::Generic
127             Args : none
128              
129             =cut
130              
131             sub next_result {
132 3     3 1 9 my ($self) = @_;
133            
134 3         10 while (my $line=$self->_readline()) {
135 278         293 chomp $line;
136            
137 278 100       710 if ($line=~/^\>(\S+)/) {
    100          
    100          
138 8         15 $self->_seqname($1);
139             }
140             elsif ($line=~/max\.\s+Y\s+(\S+)\s+\S+\s+\S+\s+(\S+)/) {
141 2         5 $self->_fact1($2);
142             }
143             elsif ($line=~/mean\s+S\s+(\S+)\s+\S+\s+\S+\s+(\S+)/) {
144 2         4 my $fact2 = $2;
145            
146 2 100 66     8 if ($fact2 eq 'YES' and $self->_fact1 eq 'YES') {
147            
148 1         2 my $line = $self->_readline();
149            
150             ###########################################
151             # modification to suit new SignalP output
152             ###########################################
153 1         2 chomp $line;
154             #print STDERR "********** <$line>\n";
155 1 50       5 if ($line =~ /\s+D\s+.*/) {
156 1         3 $line = $self->_readline();
157             }
158             #print STDERR "********** <$line>\n";
159 1         2 my $end;
160             ###########################################
161            
162            
163 1 50       4 if ($line =~ /Most likely cleavage site between pos\.\s+(\d+)/) {
164 1         2 my $end = $1;
165 1         2 my (%feature);
166 1         2 $feature{seq_id} = $self->_seqname;
167 1         2 $feature{start} = 1;
168 1         1 $feature{end} = $end;
169 1         2 $feature{source_tag} = 'Signalp';
170 1         2 $feature{primary}= 'signal_peptide';
171 1         3 $self->_parse_hmm_result(\%feature);
172 1         3 my $new_feat = $self->_create_feature (\%feature);
173 1         4 return $new_feat;
174             }
175             else {
176 0         0 $self->throw ("parsing problem in signalp");
177             }
178            
179             }
180             }
181            
182             }
183             }
184              
185             =head2 _parse_hmm_result
186              
187             Title : _parse_hmm_result
188             Usage : $self->_parse_hmm_result(\%feature)
189             Function: Internal (not to be used directly)
190             Returns : hash of feature values
191             Args : hash of more feature values
192              
193             =cut
194              
195             sub _parse_hmm_result {
196 1     1   2 my ($self, $feature_hash) = @_;
197 1         2 while(my $line = $self->_readline){
198 86         83 chomp $line;
199 86 100       212 if($line =~ /Prediction: (.+)$/){
    100          
    50          
200 1         5 $feature_hash->{hmmProdiction} = $1;
201             }elsif($line =~ /Signal peptide probability: ([0-9\.]+)/){
202 1         3 $feature_hash->{peptideProb} = $1;
203             }elsif($line =~ /Signal anchor probability: ([0-9\.]+)/){
204 0         0 $feature_hash->{anchorProb} = $1;
205 0         0 last;
206             }
207             }
208             }
209              
210             =head2 _create_feature
211              
212             Title : _create_feature
213             Usage : $self->create_feature(\%feature)
214             Function: Internal (not to be used directly)
215             Returns : hash of feature values
216             Args : hash of more feature values
217              
218             =cut
219              
220             sub _create_feature {
221 1     1   3 my ($self, $feat) = @_;
222              
223             # create feature object
224             my $feature = Bio::SeqFeature::Generic->new(
225             -seq_id => $feat->{name},
226             -start => $feat->{start},
227             -end => $feat->{end},
228             -score => $feat->{score},
229             -source => $feat->{source},
230             -primary => $feat->{primary},
231             -logic_name => $feat->{logic_name},
232 1         11 );
233            
234 1         5 $feature->score($feat->{peptideProb});
235 1         3 $feature->add_tag_value('peptideProb', $feat->{peptideProb});
236 1         4 $feature->add_tag_value('anchorProb', $feat->{anchorProb});
237 1         4 $feature->add_tag_value('evalue',$feat->{anchorProb});
238 1         3 $feature->add_tag_value('percent_id','NULL');
239 1         3 $feature->add_tag_value("hid",$feat->{primary});
240 1         3 $feature->add_tag_value('SignalpPrediction', $feat->{hmmProdiction});
241 1         1 return $feature;
242              
243             }
244              
245             =head2 _seqname
246              
247             Title : _seqname
248             Usage : $self->_seqname($name)
249             Function: Internal (not to be used directly)
250             Returns :
251             Args :
252              
253             =cut
254              
255             sub _seqname{
256 11     11   927 my ($self,$seqname)=@_;
257              
258 11 100       19 if (defined$seqname){
259 8         11 $self->{'seqname'}=$seqname;
260             }
261 11         24 return $self->{'seqname'};
262             }
263              
264             =head2 _fact1
265              
266             Title : _fact1
267             Usage : $self->fact1($fact1)
268             Function: Internal (not to be used directly)
269             Returns :
270             Args :
271              
272             =cut
273              
274             sub _fact1{
275 5     5   10 my ($self, $fact1)=@_;
276              
277 5 100       10 if (defined $fact1){
278 2         4 $self->{'fact1'}=$fact1;
279             }
280 5         14 return $self->{'fact1'};
281             }
282              
283              
284              
285             1;
286              
287