File Coverage

blib/lib/Bio/Protease.pm

Criterion	Covered	Total	%
statement	1	3	33.3
branch			n/a
condition			n/a
subroutine	1	1	100.0
pod			n/a
total	2	4	50.0

line	stmt	sub	time	code
1				package Bio::Protease;
2				{
3				$Bio::Protease::VERSION = '1.112980';
4				}
5
6				# ABSTRACT: Digest your protein substrates with customizable specificity
7
8	3	3	71076	use Moose 1.23;
	0
	0
9				use MooseX::ClassAttribute;
10				use Bio::Protease::Types qw(ProteaseRegex ProteaseName);
11				use namespace::autoclean;
12
13				with qw(
14				Bio::ProteaseI
15				Bio::Protease::Role::Specificity::Regex
16				Bio::Protease::Role::WithCache
17				);
18
19				has '+regex' => ( init_arg => 'specificity' );
20
21				has specificity => (
22				is => 'ro',
23				isa => ProteaseName,
24				required => 1,
25				coerce => 1
26				);
27
28				class_has Specificities => (
29				is => 'ro',
30				lazy_build => 1,
31				);
32
33				sub _build_Specificities {
34
35				my %specificity_of = (
36				'alcalase' => [ '.{3}[MYFLIVW].{4}'],
37				'arg-c_proteinase' => [ '.{3}R.{4}' ],
38				'asp-n_endopeptidase' => [ '.{4}D.{3}' ],
39				'asp-n_endopeptidase_glu' => [ '.{4}[DE].{3}' ],
40				'bnps_skatole' => [ '.{3}W.{4}' ],
41				'caspase_1' => [ '[FWYL].[HAT]D[^PEDQKR].{3}' ],
42				'caspase_2' => [ 'DVAD[^PEDQKR].{3}' ],
43				'caspase_3' => [ 'DMQD[^PEDQKR].{3}' ],
44				'caspase_4' => [ 'LEVD[^PEDQKR].{3}' ],
45				'caspase_5' => [ '[LW]EHD.{4}' ],
46				'caspase_6' => [ 'VE[HI]D[^PEDQKR].{3}' ],
47				'caspase_7' => [ 'DEVD[^PEDQKR].{3}' ],
48				'caspase_8' => [ '[IL]ETD[^PEDQKR].{3}' ],
49				'caspase_9' => [ 'LEHD.{4}' ],
50				'caspase_10' => [ 'IEAD.{4}' ],
51				'chymotrypsin' => [ '.{3}[FY][^P].{3}\|.{3}W[^MP].{3}' ],
52				'chymotrypsin_low' => [ '.{3}[FLY][^P].{3}\|.{3}W[^MP].{3}\|.{3}M[^PY].{3}\|.{3}H[^DMPW].{3}' ],
53				'clostripain' => [ '.{3}R.{4}' ],
54				'cnbr' => [ '.{3}M.{4}' ],
55				'enterokinase' => [ '[DN][DN][DN]K.{4}' ],
56				'factor_xa' => [ '[AFGILTVM][DE]GR.{4}' ],
57				'formic_acid' => [ '.{3}D.{4}' ],
58				'glutamyl_endopeptidase' => [ '.{3}E.{4}' ],
59				'granzymeb' => [ 'IEPD.{4}' ],
60				'hydroxylamine' => [ '.{3}NG.{3}' ],
61				'hcl' => [ '.{8}' ],
62				'iodosobenzoic_acid' => [ '.{3}W.{4}' ],
63				'lysc' => [ '.{3}K.{4}' ],
64				'lysn' => [ '.{4}K.{3}' ],
65				'ntcb' => [ '.{4}C.{3}' ],
66				'pepsin_ph1.3' => [ '.[^HKR][^P][^R][FLWY][^P].{2}\|.[^HKR][^P][FLWY].[^P].{2}' ],
67				'pepsin' => [ '.[^HKR][^P][^R][FL][^P].{2}\|.[^HKR][^P][FL].[^P].{2}' ],
68				'proline_endopeptidase' => [ '.{2}[HKR]P[^P].{3}' ],
69				'proteinase_k' => [ '.{3}[AFILTVWY].{4}' ],
70				'staphylococcal_peptidase_i' => [ '.{2}[^E]E.{4}' ],
71				'thermolysin' => [ '.{3}[^XDE][AFILMV][^P].{2}' ],
72				'thrombin' => [ '.{2}GRG.{3}\|[AFGILTVM][AFGILTVWA]PR[^DE][^DE].{2}' ],
73				'trypsin' => [ '.{2}(?!CKD).{6}', '.{2}(?!DKD).{6}', '.{2}(?!CKH).{6}', '.{2}(?!CKY).{6}', '.{2}(?!RRH).{6}', '.{2}(?!RRR).{6}', '.{2}(?!CRK).{6}',
74				'.{3}[KR][^P].{3}\|.{2}WKP.{3}\|.{2}MRP.{3}' ]
75				);
76
77				return \%specificity_of;
78				}
79
80				__PACKAGE__->meta->make_immutable;
81
82
83
84
85
86
87
88
89				__END__
90				=pod
91
92				=head1 NAME
93
94				Bio::Protease - Digest your protein substrates with customizable specificity
95
96				=head1 VERSION
97
98				version 1.112980
99
100				=head1 SYNOPSIS
101
102				use Bio::Protease;
103				my $protease = Bio::Protease->new(specificity => 'trypsin');
104
105				my $protein = 'MRAERVIKP';
106
107				# Perform a full digestion
108				my @products = $protease->digest($protein);
109
110				# products: ( 'MR', 'AER', 'VIKP' )
111
112				# Get all the siscile bonds.
113				my @sites = $protease->cleavage_sites($protein);
114
115				# sites: ( 2, 5 )
116
117				# Try to cut at a specific position.
118
119				@products = $protease->cut($protein, 2);
120
121				# products: ( 'MR', 'AERVIKP' )
122
123				=head1 DESCRIPTION
124
125				This module models the hydrolitic behaviour of a proteolytic enzyme.
126				Its main purpose is to predict the outcome of hydrolitic cleavage of a
127				peptidic substrate.
128
129				The enzyme specificity is currently modeled for 37 enzymes/reagents.
130				This models are somewhat simplistic as they are largely regex-based, and
131				do not take into account subtleties such as kinetic/temperature effects,
132				accessible solvent area, secondary or tertiary structure elements.
133				However, the module is flexible enough to allow the inclusion of any of
134				these effects by consuming the module's interface, L<Bio::ProteaseI>.
135				Alternatively, if your desired specificity can be correctly described by
136				a regular expression, you can pass it to the specificity attribute at
137				construction time. See L<specificity> below.
138
139				=head1 ATTRIBUTES
140
141				=head2 specificity
142
143				Set the enzyme's specificity. Required. Could be either of:
144
145				=over 4
146
147				=item * an enzyme name: e.g. 'enterokinase'
148
149				my $enzyme = Bio::Protease->new(specificity => 'enterokinase');
150
151				There are currently definitions for 37 enzymes/reagents. See
152				L<Specificities>.
153
154				=item * a regular expression:
155
156				my $motif = qr/MN[ED]K[^P].{3}/,
157
158				my $enzyme = Bio::Protease->new( specificity => $motif );
159
160				The motif should always describe an 8-character long peptide. When a an
161				octapeptide matches the regex, its 4th peptidic bond (ie, between the
162				4th and 5th letter) will be marked for cleaving or reporting.
163
164				For example, the peptide AMQRNLAW is recognized as follows:
165
166				.----..----.----..----. .-----.-----.-----.-----.
167				\| A \|\| M \| Q \|\| R \|*\| N \| L \| A \| W \|
168				\|----\|\|----\|----\|\|----\|^\|-----\|-----\|-----\|-----\|
169				\| P4 \|\| P3 \| P2 \|\| P1 \|\|\| P1' \| P2' \| P3' \| P4' \|
170				'----''----'----''----'\|'-----'-----'-----'-----'
171				cleavage site
172
173				Some specificity rules can only be described with more than one regular
174				expression (see the case for trypsin, for example). To account for those
175				cases, you can also pass an array reference of regular expressions; all
176				of which should match the given octapeptide:
177
178				my $rule = [$rule1, $rule2, $rule3];
179
180				my $enzyme = Bio::Protease->new( specificity => $rule );
181
182				In the case your particular specificity rule requires an "or" clause,
183				you can use the "\|" separator in a single regex.
184
185				=back
186
187				=head2 Specificities
188
189				This B<class attribute> contains a hash reference with all the available
190				regexep-based specificities. The keys are the specificity names, the
191				value is an arrayref with the regular expressions that define them.
192
193				my @protease_pool = do {
194				Bio::Protease->new(specificity => $_)
195				for keys %{Bio::Protease->Specificities};
196				}
197
198				As a rule, all specificity names are lower case. Currently, they include:
199
200				=over 2
201
202				=item * alcalase
203
204				=item * arg-cproteinase
205
206				=item * asp-n_endopeptidase
207
208				=item * asp-n_endopeptidase_glu
209
210				=item * bnps_skatole
211
212				=item * caspase_1
213
214				=item * caspase_2
215
216				=item * caspase_3
217
218				=item * caspase_4
219
220				=item * caspase_5
221
222				=item * caspase_6
223
224				=item * caspase_7
225
226				=item * caspase_8
227
228				=item * caspase_9
229
230				=item * caspase_10
231
232				=item * chymotrypsin
233
234				=item * chymotrypsin_low
235
236				=item * clostripain
237
238				=item * cnbr
239
240				=item * enterokinase
241
242				=item * factor_xa
243
244				=item * formic_acid
245
246				=item * glutamyl_endopeptidase
247
248				=item * granzymeb
249
250				=item * hydroxylamine
251
252				=item * iodosobenzoic_acid
253
254				=item * lysc
255
256				=item * lysn
257
258				=item * ntcb
259
260				=item * pepsin_ph1.3
261
262				=item * pepsin
263
264				=item * proline_endopeptidase
265
266				=item * proteinase_k
267
268				=item * staphylococcal_peptidase i
269
270				=item * thermolysin
271
272				=item * thrombin
273
274				=item * trypsin
275
276				=back
277
278				For a complete description of their specificities, you can check out
279				L<http://www.expasy.ch/tools/peptidecutter/peptidecutter_enzymes.html>,
280				or look at the regular expressions of their definitions in this same
281				file.
282
283				=head2 use_cache
284
285				Turn caching on, trading memory for speed. Defaults to 0 (no caching).
286				Useful when any method is being called several times with the same
287				argument.
288
289				my $p = Bio::Protease->new( specificity => 'trypsin', use_cache => 0 );
290				my $c = Bio::Protease->new( specificity => 'trypsin', use_cache => 1 );
291
292				my $substrate = 'MAAEELRKVIKPR' x 10;
293
294				$p->digest( $substrate ) for (1..1000); # time: 5.11s
295				$c->digest( $substrate ) for (1..1000); # time: 0.12s
296
297				=head2 cache
298
299				The cache object, which has to do the L<Cache::Ref::Role::API> role.
300				Uses L<Cache::Ref::LRU> by default with a cache size of 5000, but you
301				can set this to your liking at construction time:
302
303				my $p = Bio::Protease->new(
304				use_cache => 1,
305				cache => Cache::Ref::Random->new( size => 50 ),
306				specificity => 'trypsin'
307				);
308
309				=head1 METHODS
310
311				=head2 digest
312
313				Performs a complete digestion of the peptide argument, returning a list
314				with possible products. It does not do partial digests (see method
315				C<cut> for that).
316
317				my @products = $enzyme->digest($protein);
318
319				=head2 cut
320
321				Attempt to cleave C<$peptide> at the C-terminal end of the C<$i>-th
322				residue (ie, at the right). If the bond is indeed cleavable (determined
323				by the enzyme's specificity), then a list with the two products of the
324				hydrolysis will be returned. Otherwise, returns false.
325
326				my @products = $enzyme->cut($peptide, $i);
327
328				=head2 cleavage_sites
329
330				Returns a list with siscile bonds (bonds susceptible to be cleaved as
331				determined by the enzyme's specificity). Bonds are numbered starting
332				from 1, from N to C-terminal. Takes a string with the protein sequence
333				as an argument:
334
335				my @sites = $enzyme->cleavage_sites($peptide);
336
337				=head2 is_substrate
338
339				Returns true or false whether the peptide argument is a substrate or
340				not. Esentially, it's equivalent to calling C<cleavage_sites> in boolean
341				context, but with the difference that this method short-circuits when it
342				finds its first cleavable site. Thus, it's useful for CPU-intensive
343				tasks where the only information required is whether a polypeptide is a
344				substrate of a particular enzyme or not
345
346				=head1 SEE ALSO
347
348				=over
349
350				=item * PeptideCutter
351
352				This module's idea is largely based on Expasy's
353				PeptideCutter (L<http://www.expasy.ch/tools/peptidecutter/>). For more
354				information on the experimental evidence that supports both the
355				algorithm and the specificity definitions, check their page.
356
357				=back
358
359				=head1 AUTHOR
360
361				Bruno Vecchi <vecchi.b gmail.com>
362
363				=head1 COPYRIGHT AND LICENSE
364
365				This software is copyright (c) 2011 by Bruno Vecchi.
366
367				This is free software; you can redistribute it and/or modify it under
368				the same terms as the Perl 5 programming language system itself.
369
370				=cut
371