File Coverage

blib/lib/Bio/Protease.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package Bio::Protease;
2             {
3             $Bio::Protease::VERSION = '1.112980';
4             }
5              
6             # ABSTRACT: Digest your protein substrates with customizable specificity
7              
8 3     3   71076 use Moose 1.23;
  0            
  0            
9             use MooseX::ClassAttribute;
10             use Bio::Protease::Types qw(ProteaseRegex ProteaseName);
11             use namespace::autoclean;
12              
13             with qw(
14             Bio::ProteaseI
15             Bio::Protease::Role::Specificity::Regex
16             Bio::Protease::Role::WithCache
17             );
18              
19             has '+regex' => ( init_arg => 'specificity' );
20              
21             has specificity => (
22             is => 'ro',
23             isa => ProteaseName,
24             required => 1,
25             coerce => 1
26             );
27              
28             class_has Specificities => (
29             is => 'ro',
30             lazy_build => 1,
31             );
32              
33             sub _build_Specificities {
34              
35             my %specificity_of = (
36             'alcalase' => [ '.{3}[MYFLIVW].{4}'],
37             'arg-c_proteinase' => [ '.{3}R.{4}' ],
38             'asp-n_endopeptidase' => [ '.{4}D.{3}' ],
39             'asp-n_endopeptidase_glu' => [ '.{4}[DE].{3}' ],
40             'bnps_skatole' => [ '.{3}W.{4}' ],
41             'caspase_1' => [ '[FWYL].[HAT]D[^PEDQKR].{3}' ],
42             'caspase_2' => [ 'DVAD[^PEDQKR].{3}' ],
43             'caspase_3' => [ 'DMQD[^PEDQKR].{3}' ],
44             'caspase_4' => [ 'LEVD[^PEDQKR].{3}' ],
45             'caspase_5' => [ '[LW]EHD.{4}' ],
46             'caspase_6' => [ 'VE[HI]D[^PEDQKR].{3}' ],
47             'caspase_7' => [ 'DEVD[^PEDQKR].{3}' ],
48             'caspase_8' => [ '[IL]ETD[^PEDQKR].{3}' ],
49             'caspase_9' => [ 'LEHD.{4}' ],
50             'caspase_10' => [ 'IEAD.{4}' ],
51             'chymotrypsin' => [ '.{3}[FY][^P].{3}|.{3}W[^MP].{3}' ],
52             'chymotrypsin_low' => [ '.{3}[FLY][^P].{3}|.{3}W[^MP].{3}|.{3}M[^PY].{3}|.{3}H[^DMPW].{3}' ],
53             'clostripain' => [ '.{3}R.{4}' ],
54             'cnbr' => [ '.{3}M.{4}' ],
55             'enterokinase' => [ '[DN][DN][DN]K.{4}' ],
56             'factor_xa' => [ '[AFGILTVM][DE]GR.{4}' ],
57             'formic_acid' => [ '.{3}D.{4}' ],
58             'glutamyl_endopeptidase' => [ '.{3}E.{4}' ],
59             'granzymeb' => [ 'IEPD.{4}' ],
60             'hydroxylamine' => [ '.{3}NG.{3}' ],
61             'hcl' => [ '.{8}' ],
62             'iodosobenzoic_acid' => [ '.{3}W.{4}' ],
63             'lysc' => [ '.{3}K.{4}' ],
64             'lysn' => [ '.{4}K.{3}' ],
65             'ntcb' => [ '.{4}C.{3}' ],
66             'pepsin_ph1.3' => [ '.[^HKR][^P][^R][FLWY][^P].{2}|.[^HKR][^P][FLWY].[^P].{2}' ],
67             'pepsin' => [ '.[^HKR][^P][^R][FL][^P].{2}|.[^HKR][^P][FL].[^P].{2}' ],
68             'proline_endopeptidase' => [ '.{2}[HKR]P[^P].{3}' ],
69             'proteinase_k' => [ '.{3}[AFILTVWY].{4}' ],
70             'staphylococcal_peptidase_i' => [ '.{2}[^E]E.{4}' ],
71             'thermolysin' => [ '.{3}[^XDE][AFILMV][^P].{2}' ],
72             'thrombin' => [ '.{2}GRG.{3}|[AFGILTVM][AFGILTVWA]PR[^DE][^DE].{2}' ],
73             'trypsin' => [ '.{2}(?!CKD).{6}', '.{2}(?!DKD).{6}', '.{2}(?!CKH).{6}', '.{2}(?!CKY).{6}', '.{2}(?!RRH).{6}', '.{2}(?!RRR).{6}', '.{2}(?!CRK).{6}',
74             '.{3}[KR][^P].{3}|.{2}WKP.{3}|.{2}MRP.{3}' ]
75             );
76              
77             return \%specificity_of;
78             }
79              
80             __PACKAGE__->meta->make_immutable;
81              
82              
83              
84              
85              
86              
87              
88              
89             __END__
90             =pod
91              
92             =head1 NAME
93              
94             Bio::Protease - Digest your protein substrates with customizable specificity
95              
96             =head1 VERSION
97              
98             version 1.112980
99              
100             =head1 SYNOPSIS
101              
102             use Bio::Protease;
103             my $protease = Bio::Protease->new(specificity => 'trypsin');
104              
105             my $protein = 'MRAERVIKP';
106              
107             # Perform a full digestion
108             my @products = $protease->digest($protein);
109              
110             # products: ( 'MR', 'AER', 'VIKP' )
111              
112             # Get all the siscile bonds.
113             my @sites = $protease->cleavage_sites($protein);
114              
115             # sites: ( 2, 5 )
116              
117             # Try to cut at a specific position.
118              
119             @products = $protease->cut($protein, 2);
120              
121             # products: ( 'MR', 'AERVIKP' )
122              
123             =head1 DESCRIPTION
124              
125             This module models the hydrolitic behaviour of a proteolytic enzyme.
126             Its main purpose is to predict the outcome of hydrolitic cleavage of a
127             peptidic substrate.
128              
129             The enzyme specificity is currently modeled for 37 enzymes/reagents.
130             This models are somewhat simplistic as they are largely regex-based, and
131             do not take into account subtleties such as kinetic/temperature effects,
132             accessible solvent area, secondary or tertiary structure elements.
133             However, the module is flexible enough to allow the inclusion of any of
134             these effects by consuming the module's interface, L<Bio::ProteaseI>.
135             Alternatively, if your desired specificity can be correctly described by
136             a regular expression, you can pass it to the specificity attribute at
137             construction time. See L<specificity> below.
138              
139             =head1 ATTRIBUTES
140              
141             =head2 specificity
142              
143             Set the enzyme's specificity. Required. Could be either of:
144              
145             =over 4
146              
147             =item * an enzyme name: e.g. 'enterokinase'
148              
149             my $enzyme = Bio::Protease->new(specificity => 'enterokinase');
150              
151             There are currently definitions for 37 enzymes/reagents. See
152             L<Specificities>.
153              
154             =item * a regular expression:
155              
156             my $motif = qr/MN[ED]K[^P].{3}/,
157              
158             my $enzyme = Bio::Protease->new( specificity => $motif );
159              
160             The motif should always describe an 8-character long peptide. When a an
161             octapeptide matches the regex, its 4th peptidic bond (ie, between the
162             4th and 5th letter) will be marked for cleaving or reporting.
163              
164             For example, the peptide AMQRNLAW is recognized as follows:
165              
166             .----..----.----..----. .-----.-----.-----.-----.
167             | A || M | Q || R |*| N | L | A | W |
168             |----||----|----||----|^|-----|-----|-----|-----|
169             | P4 || P3 | P2 || P1 ||| P1' | P2' | P3' | P4' |
170             '----''----'----''----'|'-----'-----'-----'-----'
171             cleavage site
172              
173             Some specificity rules can only be described with more than one regular
174             expression (see the case for trypsin, for example). To account for those
175             cases, you can also pass an array reference of regular expressions; all
176             of which should match the given octapeptide:
177              
178             my $rule = [$rule1, $rule2, $rule3];
179              
180             my $enzyme = Bio::Protease->new( specificity => $rule );
181              
182             In the case your particular specificity rule requires an "or" clause,
183             you can use the "|" separator in a single regex.
184              
185             =back
186              
187             =head2 Specificities
188              
189             This B<class attribute> contains a hash reference with all the available
190             regexep-based specificities. The keys are the specificity names, the
191             value is an arrayref with the regular expressions that define them.
192              
193             my @protease_pool = do {
194             Bio::Protease->new(specificity => $_)
195             for keys %{Bio::Protease->Specificities};
196             }
197              
198             As a rule, all specificity names are lower case. Currently, they include:
199              
200             =over 2
201              
202             =item * alcalase
203              
204             =item * arg-cproteinase
205              
206             =item * asp-n_endopeptidase
207              
208             =item * asp-n_endopeptidase_glu
209              
210             =item * bnps_skatole
211              
212             =item * caspase_1
213              
214             =item * caspase_2
215              
216             =item * caspase_3
217              
218             =item * caspase_4
219              
220             =item * caspase_5
221              
222             =item * caspase_6
223              
224             =item * caspase_7
225              
226             =item * caspase_8
227              
228             =item * caspase_9
229              
230             =item * caspase_10
231              
232             =item * chymotrypsin
233              
234             =item * chymotrypsin_low
235              
236             =item * clostripain
237              
238             =item * cnbr
239              
240             =item * enterokinase
241              
242             =item * factor_xa
243              
244             =item * formic_acid
245              
246             =item * glutamyl_endopeptidase
247              
248             =item * granzymeb
249              
250             =item * hydroxylamine
251              
252             =item * iodosobenzoic_acid
253              
254             =item * lysc
255              
256             =item * lysn
257              
258             =item * ntcb
259              
260             =item * pepsin_ph1.3
261              
262             =item * pepsin
263              
264             =item * proline_endopeptidase
265              
266             =item * proteinase_k
267              
268             =item * staphylococcal_peptidase i
269              
270             =item * thermolysin
271              
272             =item * thrombin
273              
274             =item * trypsin
275              
276             =back
277              
278             For a complete description of their specificities, you can check out
279             L<http://www.expasy.ch/tools/peptidecutter/peptidecutter_enzymes.html>,
280             or look at the regular expressions of their definitions in this same
281             file.
282              
283             =head2 use_cache
284              
285             Turn caching on, trading memory for speed. Defaults to 0 (no caching).
286             Useful when any method is being called several times with the same
287             argument.
288              
289             my $p = Bio::Protease->new( specificity => 'trypsin', use_cache => 0 );
290             my $c = Bio::Protease->new( specificity => 'trypsin', use_cache => 1 );
291              
292             my $substrate = 'MAAEELRKVIKPR' x 10;
293              
294             $p->digest( $substrate ) for (1..1000); # time: 5.11s
295             $c->digest( $substrate ) for (1..1000); # time: 0.12s
296              
297             =head2 cache
298              
299             The cache object, which has to do the L<Cache::Ref::Role::API> role.
300             Uses L<Cache::Ref::LRU> by default with a cache size of 5000, but you
301             can set this to your liking at construction time:
302              
303             my $p = Bio::Protease->new(
304             use_cache => 1,
305             cache => Cache::Ref::Random->new( size => 50 ),
306             specificity => 'trypsin'
307             );
308              
309             =head1 METHODS
310              
311             =head2 digest
312              
313             Performs a complete digestion of the peptide argument, returning a list
314             with possible products. It does not do partial digests (see method
315             C<cut> for that).
316              
317             my @products = $enzyme->digest($protein);
318              
319             =head2 cut
320              
321             Attempt to cleave C<$peptide> at the C-terminal end of the C<$i>-th
322             residue (ie, at the right). If the bond is indeed cleavable (determined
323             by the enzyme's specificity), then a list with the two products of the
324             hydrolysis will be returned. Otherwise, returns false.
325              
326             my @products = $enzyme->cut($peptide, $i);
327              
328             =head2 cleavage_sites
329              
330             Returns a list with siscile bonds (bonds susceptible to be cleaved as
331             determined by the enzyme's specificity). Bonds are numbered starting
332             from 1, from N to C-terminal. Takes a string with the protein sequence
333             as an argument:
334              
335             my @sites = $enzyme->cleavage_sites($peptide);
336              
337             =head2 is_substrate
338              
339             Returns true or false whether the peptide argument is a substrate or
340             not. Esentially, it's equivalent to calling C<cleavage_sites> in boolean
341             context, but with the difference that this method short-circuits when it
342             finds its first cleavable site. Thus, it's useful for CPU-intensive
343             tasks where the only information required is whether a polypeptide is a
344             substrate of a particular enzyme or not
345              
346             =head1 SEE ALSO
347              
348             =over
349              
350             =item * PeptideCutter
351              
352             This module's idea is largely based on Expasy's
353             PeptideCutter (L<http://www.expasy.ch/tools/peptidecutter/>). For more
354             information on the experimental evidence that supports both the
355             algorithm and the specificity definitions, check their page.
356              
357             =back
358              
359             =head1 AUTHOR
360              
361             Bruno Vecchi <vecchi.b gmail.com>
362              
363             =head1 COPYRIGHT AND LICENSE
364              
365             This software is copyright (c) 2011 by Bruno Vecchi.
366              
367             This is free software; you can redistribute it and/or modify it under
368             the same terms as the Perl 5 programming language system itself.
369              
370             =cut
371