File Coverage

blib/lib/Bio/GeneDesign/RestrictionEnzyme.pm
Criterion Covered Total %
statement 110 392 28.0
branch 46 192 23.9
condition 7 120 5.8
subroutine 11 55 20.0
pod 50 50 100.0
total 224 809 27.6


line stmt bran cond sub pod time code
1             #
2             # GeneDesign module for restriction enzyme handing
3             #
4              
5             =head1 NAME
6              
7             Bio::GeneDesign::RestrictionEnzyme
8              
9             =head1 VERSION
10              
11             Version 5.52
12              
13             =head1 DESCRIPTION
14              
15             GeneDesign object that represents a type II restriction enzyme
16              
17             =head1 AUTHOR
18              
19             Sarah Richardson <SMRichardson@lbl.gov>
20              
21             =cut
22              
23             package Bio::GeneDesign::RestrictionEnzyme;
24              
25 11     11   63 use Bio::GeneDesign::Basic qw(:GD);
  11         26  
  11         2458  
26 11     11   66 use Carp;
  11         26  
  11         742  
27              
28 11     11   74 use strict;
  11         23  
  11         361  
29 11     11   63 use warnings;
  11         21  
  11         379  
30              
31 11     11   62 use base qw(Bio::Root::Root);
  11         22  
  11         61657  
32              
33             our $VERSION = 5.52;
34              
35             my $IIPreg = qr/ ([A-Z]*) \^ ([A-Z]*) /x;
36             my $IIAreg = qr/\A \w+ \(([\-]*\d+) \/ ([\-]*\d+)\)\Z /x;
37             my $IIBreg = qr/\A\(([\-]*\d+) \/ ([\-]*\d+)\) \w+ \(([\-]*\d+) \/ ([\-]*\d+)\)\Z /x;
38              
39             my %RE_vendors = (
40             B => "Invitrogen", C => "Minotech", E => "Stratagene Agilent",
41             F => "Thermo Scientific Fermentas", I => "SibEnzyme", J => "Nippon Gene Co.",
42             K => "Takara", M => "Roche Applied Science", N => "New England Biolabs",
43             O => "Toyobo Technologies", Q => "Molecular Biology Resources",
44             R => "Promega", S => "Sigma Aldrich", U => "Bangalore Genei", V => "Vivantis",
45             X => "EURx", Y => "CinnaGen"
46             );
47              
48             my %methtrans = (b => "blocked", blocked => "blocked",
49             i => "inhibited", inhibited => "inhibited",
50             u => "unknown", unknown => "unknown"
51             );
52              
53             =head1 CONSTRUCTOR METHODS
54              
55             =head2 new
56              
57             You can create a new enzyme or clone an existing enzyme to create a new instance
58             of an abstract enzyme definition. To do this, provide the -enzyme flag; the
59             constructor will ignore every other argument except for -start.
60              
61             Required arguments:
62              
63             EITHER
64            
65             -enzyme : a Bio::GeneDesign::RestrictionEnzyme object to clone
66            
67             OR
68             -id : The name of the enzyme (i.e., BamHI)
69             -cutseq : The string describing the enzyme's recognition and cleavage
70             site
71              
72             Optional arguments:
73              
74             -temp : The incubation temperature for the enzyme
75             -tempin : The heat inactivation temperature for the enzyme
76             -score : A float score, usually the price of the enzyme in dollars
77             -methdam : Sensitivity to dam methylation; can take the values
78             b or blocked,
79             i or inhibited,
80             u or unknown,
81             if undefined, will take the value indifferent.
82             -methdcm : Sensitivity to dcm methylation; can take the values
83             b or blocked,
84             i or inhibited,
85             u or unknown,
86             if undefined, will take the value indifferent.
87             -methcpg : Sensitivity to cpg methylation; can take the values
88             b or blocked,
89             i or inhibited,
90             u or unknown,
91             if undefined, will take the value indifferent.
92             -vendors : a string of single letter codes that represent vendor
93             availability - no spaces. see vendor() for a list of the
94             codes.
95             -staract : Whether or not the enzyme exhibits star activity - 1 or 0.
96             -buffers : a hash reference; keys are buffer names and values are the
97             enzyme activity in that buffer. For example:
98             NEB1 => 50, NEB2 => 100, etc.
99             -start : An integer representing an offset; usually used only in
100             cloned instances, as opposed to abstract instances.
101             -exclude : An arrayref full of ids for enzymes that should be
102             considered mutually exclusive to this enzyme - see exclude()
103              
104             =cut
105              
106             sub new
107             {
108 768     768 1 5615 my ($class, @args) = @_;
109 768         3193 my $self = $class->SUPER::new(@args);
110            
111 768         31095 my ($object, $id, $cutseq, $temp, $tempin, $score, $methdam, $methdcm,
112             $methcpg, $vendors, $staract, $buffers, $start, $exclude, $aggress) =
113             $self->_rearrange([qw(ENZYME ID CUTSEQ TEMP TEMPIN SCORE METHDAM METHDCM
114             METHCPG VENDORS STARACT BUFFERS START EXCLUDE AGGRESS)], @args);
115              
116 768 50       71804 if ($object)
117             {
118 0 0       0 $self->throw("object of class " . ref($object) . " does not implement ".
119             "Bio::GeneDesign::RestrictionEnzyme.")
120             unless $object->isa("Bio::GeneDesign::RestrictionEnzyme");
121 0         0 $self = $object->clone();
122             }
123             else
124             {
125            
126 768 50       2262 $self->throw("No enzyme id defined") unless ($id);
127 768         1943 $self->{'id'} = $id;
128              
129 768 50       2032 $self->throw("No cut sequence defined") unless ($cutseq);
130 768         1610 $self->{'cutseq'} = $cutseq;
131              
132 768         1265 my $recseq = $cutseq;
133 768         7850 $recseq =~ s/\W*\d*//xg;
134 768         2054 $self->{'recseq'} = $recseq;
135              
136             #Regular expression arrayref to use for enzyme searching
137             #Should store as compiled regexes instead
138 768         2347 $self->{'regex'} = _regarr($recseq);
139              
140 768         1708 my $sitelen = length($recseq);
141 768         1611 $self->{'length'} = $sitelen;
142              
143             #Enzyme Class and Palindromy
144 768         1428 my ($lef, $rig) = (q{}, q{});
145 768 100       6101 if ($cutseq =~ $IIPreg)
    100          
    50          
146             {
147 567         1065 $lef = length($1);
148 567         855 $rig = length($2);
149 567         1082 $self->{'class'} = "IIP";
150 567         1340 $self->{'classex'} = $IIPreg;
151            
152 567 100       1064 if ($lef == $rig)
153             {
154 123         263 $self->{'palindromy'} = "unknown";
155             }
156             else
157             {
158 444         515 my $inlef = $lef;
159 444 100       1603 $inlef = length($recseq) - $inlef if ($inlef > (.5 * length($recseq)));
160 444         1544 my $mattersbit = substr($recseq, $inlef, length($recseq) - (2 * $inlef));
161 444 100 66     5648 if ($mattersbit && $mattersbit =~ $ambnt && length($mattersbit) % 2 == 0)
    100 100        
    50 66        
162             {
163 54         207 $self->{'palindromy'} = "pnon";
164             }
165             elsif ($mattersbit && $mattersbit eq _complement($mattersbit, 1))
166             {
167 384         1328 $self->{'palindromy'} = "pal";
168             }
169             elsif ($mattersbit)
170             {
171 6         28 $self->{'palindromy'} = "nonpal";
172             }
173             }
174             }
175             elsif ($cutseq =~ $IIBreg)
176             {
177 33         107 $lef = int($1);
178 33         69 $rig = int($2);
179 33         68 $self->{'class'} = "IIB";
180 33         86 $self->{'classex'} = $IIBreg;
181 33         101 $self->{'palindromy'} = "pnon";
182             }
183             elsif ($cutseq =~ $IIAreg)
184             {
185 168         360 $lef = int($1);
186 168         261 $rig = int($2);
187 168         347 $self->{'class'} = "IIA";
188 168         395 $self->{'classex'} = $IIAreg;
189 168         303 $self->{'palindromy'} = "pnon";
190             }
191             else
192             {
193 0         0 $self->{'class'} = "unknown";
194             }
195              
196             #Enzyme type
197 768         972 my $type;
198 768 100       1719 if ($lef < $rig)
    100          
    50          
199             {
200 438         574 $type .= "5'";
201 438         763 $self->{'inside_cut'} = $lef;
202 438         695 $self->{'outside_cut'} = $rig;
203             }
204             elsif ($lef > $rig)
205             {
206 195         250 $type .= "3'";
207 195         329 $self->{'inside_cut'} = $rig;
208 195         465 $self->{'outside_cut'} = $lef;
209             }
210             elsif ($lef == $rig)
211             {
212 135         202 $type .= "b";
213             }
214 768 100       1769 $self->{'onebpoverhang'} = 1 if (abs($lef - $rig) == 1);
215 768         1335 $self->{'type'} = $type;
216              
217 768 100       2490 $self->{'temp'} = $temp if ($temp);
218 768 100       1868 if ($tempin)
219             {
220 630         2261 my ($intime, $intemp) = split q{@}, $tempin;
221 630         1694 $self->{'tempin'} = $intemp;
222 630         1832 $self->{'timein'} = $intime;
223             }
224              
225 768 50       3177 $self->{'score'} = $score if ($score);
226 768         1557 $self->{'aggress'} = $aggress;
227            
228 768 100       1802 $self->{'staract'} = 1 if ($staract);
229              
230 768 100       1887 if (exists $methtrans{$methdam})
231             {
232 54         149 $self->{'methdam'} = $methtrans{$methdam};
233             }
234             else
235             {
236 714         1704 $self->{'methdam'} = "indifferent";
237             }
238              
239 768 100       1600 if (exists $methtrans{$methdcm})
240             {
241 105         247 $self->{'methdcm'} = $methtrans{$methdcm};
242             }
243             else
244             {
245 663         1200 $self->{'methdcm'} = "indifferent";
246             }
247            
248 768 100       1594 if (exists $methtrans{$methcpg})
249             {
250 390         925 $self->{'methcpg'} = $methtrans{$methcpg};
251             }
252             else
253             {
254 378         723 $self->{'methcpg'} = "indifferent";
255             }
256              
257 768 100       1770 if ($vendors)
258             {
259 765         1260 my %vhsh = ();
260 765         2730 foreach my $v (split(q{}, $vendors))
261             {
262 2991 50       9098 $vhsh{$v} = $RE_vendors{$v} if (exists $RE_vendors{$v});
263 2991 50       8451 carp("$v not in vendor list!") unless (exists $RE_vendors{$v});
264             }
265 765         2447 $self->{'vendors'} = \%vhsh;
266             }
267            
268 768 50       3615 $self->{'buffers'} = $buffers if ($buffers);
269             }
270            
271 768 50       1422 $self->{'start'} = $start if ($start);
272            
273 768 50       1348 $self->{'exclude'} = $exclude if ($exclude);
274            
275 768         5312 return $self;
276             }
277              
278             =head1 FUNCTIONAL METHODS
279              
280             =head2 clone
281              
282             By default in GeneDesign code, RestrictionEnzyme objects are meant to stand as
283             abstracts - that is, they stand for BamHI in general, and not for a particular
284             instance of a BamHI recognition site. If you want to use the objects in the
285             latter sense, you will need to clone the abstract object instantiated when the
286             definition file is read in, thus generating an arbitrary number of BamHI
287             instances that can then be differentiated by their start attributes.
288              
289             =cut
290              
291             sub clone
292             {
293 0     0 1 0 my ($self) = @_;
294 0         0 my $copy;
295 0         0 foreach my $key (keys %$self)
296             {
297 0 0       0 if (ref $self->{$key} eq "ARRAY")
    0          
298             {
299 0         0 $copy->{$key} = [@{$self->{$key}}];
  0         0  
300             }
301             elsif (ref $self->{$key} eq "HASH")
302             {
303 0         0 $copy->{$key} = {%{$self->{$key}}};
  0         0  
304             }
305             else
306             {
307 0         0 $copy->{$key} = $self->{$key};
308             }
309             }
310 0         0 bless $copy, ref $self;
311 0         0 return $copy;
312             }
313              
314             =head2 positions
315              
316             Generates a hash describing the positions of the enzyme's recognition
317             sites in a nucleotide sequence. Keys are offset in nucleotides, and values are
318             the recognition site found at said offset as a string.
319            
320             =cut
321              
322             sub positions
323             {
324 29     29 1 44 my ($self, $seq) = @_;
325 29         44 my $total = {};
326 29         36 foreach my $sit (@{$self->{regex}})
  29         88  
327             {
328 46         2006 while ($seq =~ /(?=($sit))/ixg)
329             {
330 27         448 $total->{pos $seq} = $1;
331             }
332             }
333 29         104 return $total;
334             }
335              
336             =head2 overhang
337              
338             Given a nucleotide sequence context, what overhang does this enzyme leave, and
339             how far away from the cutsite is it?
340              
341             Arguments:
342              
343             =cut
344              
345             sub overhang
346             {
347 0     0 1 0 my ($self, $dna, $context, $strand) = @_;
348 0         0 my ($ohangstart, $mattersbit) = (0, q{});
349 0         0 my $lef;
350             my $rig;
351 0 0       0 if ($self->{class} eq "IIP")
    0          
352             {
353 0 0       0 ($lef, $rig) = (length($1), length($2)) if ($self->{cutseq} =~ $IIPreg);
354 0 0       0 ($lef, $rig) = ($rig, $lef) if ($rig < $lef);
355 0         0 $ohangstart = $lef + 1;
356 0         0 $mattersbit = substr($dna, $ohangstart-1, $rig-$lef);
357             }
358             elsif ($self->{class} eq "IIA")
359             {
360 0 0       0 ($lef, $rig) = ($1, $2) if ($self->{cutseq} =~ $IIAreg);
361 0 0       0 ($lef, $rig) = ($rig, $lef) if ($rig < $lef);
362 0 0       0 if ($strand == 1)
363             {
364 0         0 $ohangstart = length($dna) + $lef + 1;
365             }
366             else
367             {
368 0         0 $ohangstart = length($context) - length($dna) - $rig + 1;
369             }
370 0         0 $mattersbit = substr($context, $ohangstart-1, $rig-$lef);
371 0 0       0 $ohangstart = $strand == 1 ? length($dna) + $lef : 0 - ($rig);
372             }
373             else
374             {
375 0         0 return ();
376             }
377 0         0 return ($ohangstart, $mattersbit);
378             }
379              
380             =head2 display
381              
382             Generates a tab delimited display string that can be used to print enzyme
383             information out in a tabular format.
384              
385             =cut
386              
387             sub display
388             {
389 0     0 1 0 my ($self) = @_;
390 0 0       0 my $staract = $self->{staract} ? "*" : q{};
391 0         0 my (@blocked, @inhibed) = ((), ());
392 0 0       0 push @blocked, "cpg" if ($self->{methcpg} eq "blocked");
393 0 0       0 push @blocked, "dam" if ($self->{methdam} eq "blocked");
394 0 0       0 push @blocked, "dcm" if ($self->{methdcm} eq "blocked");
395 0 0       0 push @inhibed, "cpg" if ($self->{methcpg} eq "inhibited");
396 0 0       0 push @inhibed, "dam" if ($self->{methdam} eq "inhibited");
397 0 0       0 push @inhibed, "dcm" if ($self->{methdcm} eq "inhibited");
398 0         0 my $buffstr = undef;
399 0         0 foreach (sort keys %{$self->{buffers}})
  0         0  
400             {
401 0 0       0 $buffstr .= "$_ (" . $self->{buffers}->{$_} . ") " if ($self->{buffers}->{$_});
402             }
403 0         0 my $vendstr = join(", ", values %{$self->{vendors}});
  0         0  
404 0         0 my $display = undef;
405 0 0       0 my $inact = $self->{tempin} ? " (". $self->{timein} . q{@} . $self->{tempin} . ")" : q{};
406 0         0 $display .= $self->{id} . "\t";
407 0         0 $display .= $self->{cutseq} . $staract . "\t";
408 0         0 $display .= $self->{type} . "\t";
409 0 0       0 $display .= $self->{start} . "\t" if ($self->{start});
410 0         0 $display .= $self->{temp} . $inact . "\t";
411 0         0 $display .= join(", ", @blocked) . "\t";
412 0         0 $display .= join(", ", @inhibed) . "\t";
413 0         0 $display .= $self->{score} . "\t";
414 0         0 $display .= $buffstr . "\t";
415 0         0 $display .= $vendstr . "\t";
416 0         0 return $display;
417             }
418              
419             =head2 common_buffers
420              
421             Returns an array reference listing the buffers, if any, in which two enzymes
422             both have 100% activity. in boolean mode returns the number of buffers
423              
424             =cut
425              
426             sub common_buffers
427             {
428 0     0 1 0 my ($self, $buddy, $bool) = @_;
429 0 0       0 $self->throw("Argument is not a Bio::GeneDesign::RestrictionEnzyme")
430             unless $buddy->isa("Bio::GeneDesign::RestrictionEnzyme");
431              
432 0         0 my $sbuffs = $self->{buffers};
433 0         0 my $bbuffs = $buddy->{buffers};
434 0         0 my @answer;
435 0         0 foreach my $skey (sort keys %{$sbuffs})
  0         0  
436             {
437 0         0 my $sval = $sbuffs->{$skey};
438 0         0 my $bval = $bbuffs->{$skey};
439 0 0 0     0 if ($skey eq "Other" && $sval && $bval && "$sval" eq "$bval")
    0 0        
      0        
      0        
      0        
      0        
440             {
441 0         0 push @answer, $skey;
442             }
443             elsif ($sval && $bval && "$sval" == 100 && "$bval" == 100)
444             {
445 0         0 push @answer, $skey;
446             }
447             }
448 0 0       0 return $bool ? scalar(@answer) : \@answer;
449             }
450              
451             =head2 acceptable_buffer
452              
453             Returns a buffer in which both enzymes will have at least a thresholded amount
454             of activity.
455              
456             =cut
457              
458             sub acceptable_buffer
459             {
460 0     0 1 0 my ($self, $buddy, $level) = @_;
461 0 0       0 $self->throw("Argument is not a Bio::GeneDesign::RestrictionEnzyme")
462             unless $buddy->isa("Bio::GeneDesign::RestrictionEnzyme");
463              
464 0   0     0 $level = $level || 75;
465 0         0 my $sbuffs = $self->{buffers};
466 0         0 my $bbuffs = $buddy->{buffers};
467 0         0 my %answers;
468 0         0 foreach my $skey (sort keys %{$sbuffs})
  0         0  
469             {
470 0         0 my $sval = $sbuffs->{$skey};
471 0         0 my $bval = $bbuffs->{$skey};
472 0 0 0     0 if ($skey eq "Other" && $sval && $bval && $sval == $bval)
    0 0        
      0        
      0        
      0        
      0        
473             {
474 0         0 $answers{$skey} = 200;
475             }
476             elsif ($sval && $bval && $sval >= $level && $bval >= $level)
477             {
478 0         0 $answers{$skey} = $sval + $bval;
479             }
480             }
481 0 0       0 my @keys = sort {$answers{$b} <=> $answers{$a} && $b cmp $a} keys %answers;
  0         0  
482 0 0       0 return scalar @keys ? $keys[0] : undef;
483             }
484              
485             =head2 units
486              
487             Returns the number of units needed to cleave some sequence
488              
489             =cut
490              
491             sub units
492             {
493 0     0 1 0 my ($self, @args) = @_;
494            
495 0         0 my ($buffer, $sequence) = $self->_rearrange([qw(buffer sequence)], @args);
496            
497            
498 0         0 my $poshsh = $self->positions($sequence);
499 0         0 my $count = scalar keys %{$poshsh};
  0         0  
500            
501 0         0 my $freq = $count / (length $sequence);
502            
503 0   0     0 my $aggr = $self->aggress() || .000001;
504 0         0 $aggr = 1 / $aggr;
505            
506 0   0     0 $buffer = $buffer || $self->acceptable_buffer($self, 100);
507 0   0     0 my $buff = $self->buffers->{$buffer} || 1;
508 0         0 my $jad = $buff / 100;
509 0 0       0 my $adj = $jad > 0 ? 1 / $jad : 0;
510            
511 0         0 my $units = sprintf("%.1f", $freq * $aggr * $adj);
512            
513 0         0 return $units;
514             }
515              
516              
517             =head1 FILTERING METHODS
518              
519             =head2 filter_by_sequence
520              
521             Arguments: an arrayref of string nucleotide sequences (may be ambiguous)
522             a flag indicating whether or not the sequences in the array are
523             required (1 means they must NOT match; default 0 means they must
524             match)
525            
526             Returns : 1 if the enzyme passes;
527             0 if the enzyme fails.
528              
529             =cut
530              
531             sub filter_by_sequence
532             {
533 0     0 1 0 my ($self, $arrref, $req) = @_;
534 0 0       0 $req = 0 if (! $req);
535 0         0 my $result = 1;
536 0         0 foreach my $seq (@$arrref)
537             {
538 0         0 my $regex = _regres($seq, 1);
539 0 0       0 if ($regex =~ /\[ X \]/x)
540             {
541 0         0 print "\tWARNING: Can't parse sequence $seq containing non-nucleotide "
542             . "characters - ignoring.\n";
543 0         0 next;
544             }
545 0 0 0     0 $result = 0 if ( $req == 1 && $self->{recseq} =~ $regex );
546 0 0 0     0 $result = 0 if ( $req == 0 && $self->{recseq} !~ $regex );
547             }
548 0         0 return $result;
549             }
550              
551             =head2 filter_by_score
552              
553             Arguments : a float
554              
555             Returns : 1 if the enzyme's score is less than or equal to the argument,
556             0 if the enzyme's score is higher.
557              
558             =cut
559              
560             sub filter_by_score
561             {
562 0     0 1 0 my ($self, $score) = @_;
563 0         0 my $result = 1;
564 0 0       0 $result = 0 if ($self->{score} > $score);
565 0         0 return $result;
566             }
567              
568             =head2 filter_by_vendor
569              
570             Arguments : an arrayref of vendor abbreviations; see vendor().
571              
572             Returns : 1 if the enzyme is supplied by any of the vendors queried,
573             0 else.
574              
575             =cut
576              
577             sub filter_by_vendor
578             {
579 0     0 1 0 my ($self, $vendlist) = @_;
580 0         0 my $result = 1;
581 0         0 my $flag = 0;
582 0         0 foreach my $vend (@$vendlist)
583             {
584 0 0       0 unless (exists($RE_vendors{$vend}))
585             {
586 0         0 print "\tWARNING: Can't parse vendor argument $vend - ignoring.\n";
587 0         0 next;
588             }
589 0 0       0 $flag++ if ( exists( $self->{vendors}->{$vend} ) );
590             }
591 0 0       0 $result = $flag == 0 ? 0 : 1;
592 0         0 return $result;
593             }
594              
595             =head2 filter_by_buffer_activity
596              
597             Arguments : a hashref of buffer thresholds; the key is the buffer name, the
598             value is an activity threshold.
599            
600             Returns : 1 if the enzyme meets all the buffer requirements,
601             0 else.
602              
603             =cut
604              
605             sub filter_by_buffer_activity
606             {
607 0     0 1 0 my ($self, $hshref) = @_;
608 0         0 my $result = 1;
609 0         0 my $rebuff = $self->{buffers};
610 0         0 foreach my $buff (keys %$hshref)
611             {
612 0         0 my $val = $hshref->{$buff};
613 0 0 0     0 $result = 0 if ( ! exists($rebuff->{$buff}) || $rebuff->{$buff} < $val );
614             }
615            
616 0         0 return $result;
617             }
618              
619             =head2 filter_by_dcm_sensitivity
620              
621             Arguments : an arrayref of sensitivity values; the key is the sensitivity
622             blocked, inhibited, or indifferent
623            
624             Returns : 1 if the enzyme meets the dcm sensitivity requirements,
625             0 else.
626            
627             =cut
628              
629             sub filter_by_dcm_sensitivity
630             {
631 0     0 1 0 my ($self, $arrref) = @_;
632 0         0 my $result = 1;
633 0         0 my %sensehsh;
634 0         0 foreach my $sense (@$arrref)
635             {
636 0 0 0     0 if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent")
      0        
637             {
638 0         0 $sense = lc $sense;
639 0         0 print "\tWARNING: Can't parse dcmsense argument $sense - ignoring.\n";
640 0         0 next;
641             }
642 0         0 $sensehsh{$sense}++;
643             }
644 0 0       0 $result = 0 unless ( exists($sensehsh{$self->{methdcm}}) );
645 0         0 return $result;
646             }
647              
648             =head2 filter_by_dam_sensitivity
649              
650             Arguments : an arrayref of sensitivity values; the key is the sensitivity
651             blocked, inhibited, or indifferent
652            
653             Returns : 1 if the enzyme meets the dam sensitivity requirements,
654             0 else.
655            
656             =cut
657              
658             sub filter_by_dam_sensitivity
659             {
660 0     0 1 0 my ($self, $arrref) = @_;
661 0         0 my $result = 1;
662 0         0 my %sensehsh;
663 0         0 foreach my $sense (@$arrref)
664             {
665 0 0 0     0 if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent")
      0        
666             {
667 0         0 $sense = lc $sense;
668 0         0 print "\tWARNING: Can't parse damsense argument $sense - ignoring.\n";
669 0         0 next;
670             }
671 0         0 $sensehsh{$sense}++;
672             }
673 0 0       0 $result = 0 unless ( exists($sensehsh{$self->{methdam}}) );
674 0         0 return $result;
675             }
676              
677             =head2 filter_by_cpg_sensitivity
678              
679             Arguments : an arrayref of sensitivity values; the key is the sensitivity
680             blocked, inhibited, or indifferent
681            
682             Returns : 1 if the enzyme meets the cpg sensitivity requirements,
683             0 else.
684            
685             =cut
686              
687             sub filter_by_cpg_sensitivity
688             {
689 0     0 1 0 my ($self, $arrref) = @_;
690 0         0 my $result = 1;
691 0         0 my %sensehsh;
692 0         0 foreach my $sense (@$arrref)
693             {
694 0 0 0     0 if ($sense ne "blocked" && $sense ne "inhibited" && $sense ne "indifferent")
      0        
695             {
696 0         0 $sense = lc $sense;
697 0         0 print "\tWARNING: Can't parse cpgsense argument $sense - ignoring.\n";
698 0         0 next;
699             }
700 0         0 $sensehsh{$sense}++;
701             }
702 0 0       0 $result = 0 unless ( exists($sensehsh{$self->{methcpg}}) );
703 0         0 return $result;
704             }
705              
706             =head2 filter_by_star_activity
707              
708             Arguments : 1 if star activity required, 0 else
709            
710             Returns : 1 if the enzyme meets the star activity requirements,
711             0 else.
712              
713             =cut
714              
715             sub filter_by_star_activity
716             {
717 0     0 1 0 my ($self, $star) = @_;
718 0         0 my $result = 1;
719 0 0       0 $star = 0 unless ($star);
720 0 0 0     0 $result = 0 if (($star && ! $self->{staract}) || (! $star && $self->{staract}));
      0        
      0        
721 0         0 return $result;
722             }
723              
724             =head2 filter_by_incubation_temperature
725              
726             Arguments : an arrayref of acceptable integer incubation temperatures
727            
728             Returns : 1 if the enzyme meets the incubation temperature requirements,
729             0 else.
730              
731             =cut
732              
733             sub filter_by_incubation_temperature
734             {
735 0     0 1 0 my ($self, $arrref) = @_;
736 0         0 my $result = 1;
737 0         0 my %temps;
738 0         0 foreach my $temp (@$arrref)
739             {
740 0 0 0     0 if ($temp !~ /\d/x || $temp <= 0)
741             {
742 0         0 print "\tWARNING: Can't parse incubation argument $temp - ignoring.\n";
743             }
744 0         0 $temps{$temp}++;
745             }
746 0 0       0 $result = 0 unless ( exists $temps{$self->{temp}});
747 0         0 return $result;
748             }
749              
750             =head2 filter_by_inactivation_temperature
751              
752             Arguments : an acceptable integer inactivation temperature maximum
753            
754             Returns : 1 if the enzyme meets the inactivation temperature requirement,
755             0 else.
756              
757             =cut
758              
759             sub filter_by_inactivation_temperature
760             {
761 0     0 1 0 my ($self, $temp) = @_;
762 0         0 my $result = 1;
763 0 0 0     0 if ($temp !~ /\d/x || $temp <= 0)
764             {
765 0         0 print "\tWARNING: Can't parse inactivation argument $temp - ignoring.\n";
766             }
767             else
768             {
769 0 0       0 $result = 0 if ($self->{tempin} > $temp);
770             }
771 0         0 return $result;
772             }
773              
774             =head2 filter_by_base_ambiguity
775              
776             Arguments : "nonNonly" if any non N bases are allowed; "ATCGonly" if only
777             A, T, C, or G are allowed
778            
779             Returns : 1 if the enzyme meets the ambiguous nucleotide requirement,
780             0 else.
781              
782             =cut
783              
784             sub filter_by_base_ambiguity
785             {
786 0     0 1 0 my ($self, $ambig) = @_;
787 0         0 my $result = 1;
788 0 0 0     0 if ($ambig ne "nonNonly" && $ambig ne "ATCGonly")
789             {
790 0         0 print "\tWARNING: Can't parse ambiguity argument $ambig - ignoring.\n";
791             }
792             else
793             {
794 0         0 my $ambregex;
795 0 0       0 $ambregex = qr/N/ if ($ambig eq "nonNonly");
796 0 0       0 $ambregex = $ambnt if ($ambig eq "ATCGonly");
797 0 0       0 $result = 0 unless ( $self->{recseq} =~ $ambregex );
798             }
799 0         0 return $result;
800             }
801              
802             =head2 filter_by_length
803              
804             Arguments : an arrayref of acceptable recognition site lengths
805            
806             Returns : 1 if the enzyme meets the recognition site length requirements,
807             0 else.
808              
809             =cut
810              
811             sub filter_by_length
812             {
813 0     0 1 0 my ($self, $arrref) = @_;
814 0         0 my $result = 1;
815 0         0 my %lens;
816 0         0 foreach my $len (@$arrref)
817             {
818 0 0 0     0 if ($len =~ /\D/x || $len <= 0)
819             {
820 0         0 print "\tWARNING: Can't parse length argument $len - ignoring.\n";
821 0         0 next;
822             }
823 0         0 $lens{$len}++;
824             }
825 0 0       0 $result = 0 unless ( exists $lens{length($self->{recseq})} );
826 0         0 return $result;
827             }
828              
829             =head2 filter_by_overhang_palindromy
830              
831             Arguments : an arrayref of acceptable overhang palindromys, from the list
832             pal (palindromic),
833             nonpal (nonpalindromic),
834             pnon (potentially nonpalindromic)
835            
836             Returns : 1 if the enzyme meets the palindromy requirements,
837             0 else.
838              
839             =cut
840              
841             sub filter_by_overhang_palindromy
842             {
843 0     0 1 0 my ($self, $arrref) = @_;
844 0         0 my $result = 1;
845 0         0 my %pals;
846 0         0 foreach my $pal (@$arrref)
847             {
848 0 0 0     0 if ($pal ne "pal" && $pal ne "pnon" && $pal ne "nonpal")
      0        
849             {
850 0         0 print "\tWARNING: Can't parse palindromy argument $pal - ignoring.\n";
851 0         0 next;
852             }
853 0         0 $pals{$pal}++;
854             }
855 0 0       0 $result = 0 unless (exists $pals{$self->{palindromy}});
856 0         0 return $result;
857             }
858              
859             =head2 filter_by_stickiness
860              
861             Arguments : an arrayref of acceptable overhang orientations, from the list
862             1 (single basepair overhang),
863             5 (five prime overhang),
864             3 (three prime overhang),
865             b (blunt ended)
866            
867             Returns : 1 if the enzyme meets the overhang requirements,
868             0 else.
869              
870             =cut
871              
872             sub filter_by_stickiness
873             {
874 0     0 1 0 my ($self, $arrref) = @_;
875 0         0 my $result = 1;
876 0         0 my %sticks;
877 0         0 foreach my $stick (@$arrref)
878             {
879 0 0 0     0 if ($stick ne "5" && $stick ne "3" && $stick ne "1" && $stick ne "b")
      0        
      0        
880             {
881 0         0 print "\tWARNING: Can't parse sticky argument $stick - ignoring.\n";
882 0         0 next;
883             }
884 0         0 $sticks{$stick}++;
885             }
886 0 0 0     0 $result = 0 if ($self->{onebpoverhang} && ! exists $sticks{1});
887 0         0 my $type = $self->{type};
888 0         0 $type =~ s/\'//xg;
889 0 0       0 $result = 0 unless (exists $sticks{$type});
890 0         0 return $result;
891             }
892              
893             =head1 ACCESSOR METHODS
894              
895             Methods for setting and accessing enzyme attributes
896              
897             =head2 id
898              
899             The name of the enzyme.
900              
901             =cut
902              
903             sub id
904             {
905 393314     393314 1 557684 my ($self) = @_;
906 393314         2242830 return $self->{'id'};
907             }
908              
909             =head2 display_name
910              
911             The name of the enzyme.
912              
913             =cut
914              
915             sub display_name
916             {
917 0     0 1 0 my ($self) = @_;
918 0         0 return $self->{'id'};
919             }
920              
921             =head2 score
922              
923             This attribute initially holds the price in dollars per unit of the enzyme
924             (2009 US Dollars) but can be used to hold any score or cost value.
925              
926             =cut
927              
928             sub score
929             {
930 0     0 1 0 my ($self, $value) = @_;
931 0 0       0 if (defined $value)
932             {
933 0         0 $self->{'score'} = $value;
934             }
935 0         0 return $self->{'score'};
936             }
937              
938             =head2 aggress
939              
940             Aggressiveness is the number of recognition sites in a template piece of DNA
941             (usually lambda, but sometimes adeno2, pBR322, pUC19, pXba, etc) over the total
942             length of that template piece of DNA. This number tells the manufacturer how
943             much enzyme to sell as a "unit" - the amount of enzyme required to fully digest
944             one microgram of template DNA under reaction conditions in an hour.
945              
946             =cut
947              
948             sub aggress
949             {
950 0     0 1 0 my ($self, $value) = @_;
951 0 0       0 if (defined $value)
952             {
953 0         0 $self->{'aggress'} = $value;
954             }
955 0         0 return $self->{'aggress'};
956             }
957              
958             =head2 len
959              
960             The length in bases of the recognition sequence (recseq).
961              
962             =cut
963              
964             sub len
965             {
966 0     0 1 0 my ($self) = @_;
967 0         0 return $self->{'length'};
968             }
969              
970             =head2 methcpg
971              
972             The effect of CpG methylation on the enzyme's efficacy.
973              
974             =cut
975              
976             sub methcpg
977             {
978 0     0 1 0 my ($self) = @_;
979 0         0 return $self->{'methcpg'};
980             }
981              
982             =head2 methdcm
983              
984             The effect of Dcm methylation on the enzyme's efficacy.
985              
986             =cut
987              
988             sub methdcm
989             {
990 0     0 1 0 my ($self) = @_;
991 0         0 return $self->{'methdcm'};
992             }
993              
994             =head2 methdam
995              
996             The effect of Dam methylation on the enzyme's efficacy.
997              
998             =cut
999              
1000             sub methdam
1001             {
1002 0     0 1 0 my ($self) = @_;
1003 0         0 return $self->{'methdam'};
1004             }
1005              
1006             =head2 buffers
1007              
1008             A hash reference where the keys are buffer names and the values are the activity
1009             level of the enzyme in that Buffer. Since most of the enzymes in the default
1010             GeneDesign list are NEB enzymes, this is usually full of NEB buffers.
1011              
1012             =cut
1013              
1014             sub buffers
1015             {
1016 0     0 1 0 my ($self) = @_;
1017 0         0 return $self->{'buffers'};
1018             }
1019              
1020             =head2 vendors
1021              
1022             A hash reference where the keys are abbreviations for and the values are names
1023             of vendors that stock the enzyme. These are read in from the enzyme file.
1024              
1025             B = Invitrogen
1026             C = Minotech
1027             E = Stratagene
1028             F = Thermo Scientific Fermentas
1029             I = SibEnzyme
1030             J = Nippon Gene Co.
1031             K = Takara
1032             M = Roche Applied Science
1033             N = New England Biolabs
1034             O = Toyobo Technologies
1035             Q = Molecular Biology Resources
1036             R = Promega
1037             S = Sigma Aldrich
1038             U = Bangalore Genei
1039             V = Vivantis
1040             X = EURx
1041             Y = CinnaGen
1042            
1043             =cut
1044              
1045             sub vendors
1046             {
1047 0     0 1 0 my ($self) = @_;
1048 0         0 return $self->{'vendors'};
1049             }
1050              
1051             =head2 tempin
1052              
1053             The temperature in degrees Celsius that deactivates the enzyme.
1054              
1055             =cut
1056              
1057             sub tempin
1058             {
1059 0     0 1 0 my ($self) = @_;
1060 0         0 return $self->{'tempin'};
1061             }
1062              
1063             =head2 timein
1064              
1065             The time required at inactivation temperature to deactivate the enzyme.
1066              
1067             =cut
1068              
1069             sub timein
1070             {
1071 0     0 1 0 my ($self) = @_;
1072 0         0 return $self->{'timein'};
1073             }
1074              
1075             =head2 temp
1076              
1077             Incubation temperature for the best enzyme activity, in degrees Celsius.
1078              
1079             =cut
1080              
1081             sub temp
1082             {
1083 0     0 1 0 my ($self) = @_;
1084 0         0 return $self->{'temp'};
1085             }
1086              
1087             =head2 recseq
1088              
1089             This attribute is the "clean" description of the enzyme's recognition sequence -
1090             that is, no information about cleavage site can be gained from this attribute.
1091             This is determined automatically from the cleavage string (cutseq) at
1092             instantiation.
1093              
1094             =cut
1095              
1096             sub recseq
1097             {
1098 0     0 1 0 my ($self) = @_;
1099 0         0 return $self->{'recseq'};
1100             }
1101              
1102             =head2 seq
1103              
1104             Synonym for recseq
1105              
1106             =cut
1107              
1108             sub seq
1109             {
1110 21     21 1 40 my ($self) = @_;
1111 21         131 return $self->{'recseq'};
1112             }
1113              
1114             =head2 cutseq
1115              
1116             This attribute is the string description of the enzyme's recognition sequence.
1117             It includes information about both the recognition and cleavage sites.
1118             See http://rebase.neb.com/rebase/rebrec.html for help interpreting this field.
1119              
1120             =cut
1121              
1122             sub cutseq
1123             {
1124 0     0 1 0 my ($self) = @_;
1125 0         0 return $self->{'cutseq'};
1126             }
1127              
1128             =head2 regex
1129              
1130             This attribute stores a set of regular expressions as an array reference to
1131             speed the search for recognition sites in sequence. The first entry in the
1132             arrayref is the regular expression representing the forward orientation of
1133             the recognition sequence; the second entry represents the reverse orientation
1134             and is only defined if the recognition site is nonpalindromic.
1135              
1136             This attribute is defined at instantiation.
1137              
1138             =cut
1139              
1140             sub regex
1141             {
1142 1     1 1 3 my ($self) = @_;
1143 1         5 return $self->{'regex'};
1144             }
1145              
1146             =head2 class
1147              
1148             Class describes the cutting behavior of an enzyme. The classes used by
1149             GeneDesign uses a generalized subset of the classes as described at Rebase - for
1150             the purposes of enzyme editing, three classes have so far proven to be enough.
1151             See http://rebase.neb.com/cgi-bin/sublist for the full description of enzyme
1152             classes.
1153              
1154             IIP : This enzyme has a symmetric target and a symmetric cleavage site; this
1155             usually means that the enzyme cleaves inside its own recognition site.
1156             This is not the same as overhang palindromy!
1157            
1158             IIA : This enzyme has an asymmetric recognition site and usually cleaves
1159             outside of it.
1160            
1161             IIB : This enzyme has one recognition site and two cleavage sites, one on
1162             either side of the recognition site, and thus cuts itself out of
1163             sequence.
1164              
1165             =cut
1166              
1167             sub class
1168             {
1169 0     0 1 0 my ($self) = @_;
1170 0         0 return $self->{'class'};
1171             }
1172              
1173             =head2 classex
1174              
1175             =cut
1176              
1177             sub classex
1178             {
1179 0     0 1 0 my ($self) = @_;
1180 0         0 return $self->{'classex'};
1181             }
1182              
1183             =head2 class_regexes
1184              
1185             Short cut to accessing class regular expressions
1186              
1187             =cut
1188              
1189             sub class_regexes
1190             {
1191 0     0 1 0 return {"IIP" => $IIPreg, "IIA" => $IIAreg, "IIB" => $IIBreg};
1192             }
1193              
1194             =head2 type
1195              
1196             Type describes the kind of overhang left by an enzyme. This is probably not a
1197             good use of the word type.
1198              
1199             Type may be 5', for a five prime overhang; 3', for a three prime overhang;
1200             or b for blunt ends.
1201              
1202             =cut
1203              
1204             sub type
1205             {
1206 0     0 1 0 my ($self) = @_;
1207 0         0 return $self->{'type'};
1208             }
1209              
1210              
1211             =head2 onebpoverhang
1212              
1213             One basepair overhangs can be harder to ligate than blunt ends. This attribute
1214             returns 1 if an enzyme leaves a 1bp overhang and 0 else.
1215              
1216             =cut
1217              
1218             sub onebpoverhang
1219             {
1220 0     0 1 0 my ($self) = @_;
1221 0         0 return $self->{'onebpoverhang'};
1222             }
1223              
1224             =head2 exclude
1225              
1226             Some enzymes share overlapping recognition sites. If you are trying to ensure
1227             the absence or uniqueness of a recognition site, you will want to be sure to
1228             exclude isoschizomers and neoschizomers from consideration elsewhere. The
1229             exclude attribute stores an array reference that lists the ids of neo- and
1230             isoschizomers - or any arbitrary enzyme that is incompatible with this enzyme -
1231             for easy lookup.
1232              
1233             =cut
1234              
1235             sub exclude
1236             {
1237 768     768 1 1553 my ($self, $value) = @_;
1238 768 50       2515 if (defined $value)
1239             {
1240 768 50       2925 $self->throw("$value is not a reference to an array")
1241             unless (ref $value eq "ARRAY");
1242 768         2841 $self->{'exclude'} = $value;
1243             }
1244 768         5616 return $self->{'exclude'};
1245             }
1246              
1247             =head2 palindromy
1248              
1249             Information about the overhang the enzyme leaves.
1250              
1251             pal = palindromic
1252             nonpal = nonpalindromic
1253             pnon = potentially nonpalindromic, or sometimes palindromic and sometimes
1254             nonpalindromic
1255             unknown = unknown
1256            
1257             =cut
1258              
1259             sub palindromy
1260             {
1261 0     0 1   my ($self) = @_;
1262 0           return $self->{'palindromy'};
1263             }
1264              
1265             =head2 staract
1266              
1267             1 if the enzyme exhibits star activity, 0 else
1268              
1269             =cut
1270              
1271             sub staract
1272             {
1273 0     0 1   my ($self) = @_;
1274 0           return $self->{'staract'};
1275             }
1276              
1277             =head2 start
1278              
1279             The offset in nucleotides of the enzymes recognition site in an ORF
1280              
1281             =cut
1282              
1283             sub start
1284             {
1285 0     0 1   my ($self, $value) = @_;
1286 0 0         if (defined $value)
1287             {
1288 0           $self->{'start'} = $value;
1289             }
1290 0           return $self->{'start'};
1291             }
1292              
1293             =head2 outside_cut
1294              
1295             =cut
1296              
1297             sub outside_cut
1298             {
1299 0     0 1   my ($self) = @_;
1300 0           return $self->{'outside_cut'};
1301             }
1302              
1303             =head2 inside_cut
1304              
1305             =cut
1306              
1307             sub inside_cut
1308             {
1309 0     0 1   my ($self) = @_;
1310 0           return $self->{'inside_cut'};
1311             }
1312              
1313             1;
1314              
1315             __END__
1316              
1317             =head1 COPYRIGHT AND LICENSE
1318              
1319             Copyright (c) 2013, GeneDesign developers
1320             All rights reserved.
1321              
1322             Redistribution and use in source and binary forms, with or without modification,
1323             are permitted provided that the following conditions are met:
1324              
1325             * Redistributions of source code must retain the above copyright notice, this
1326             list of conditions and the following disclaimer.
1327              
1328             * Redistributions in binary form must reproduce the above copyright notice, this
1329             list of conditions and the following disclaimer in the documentation and/or
1330             other materials provided with the distribution.
1331              
1332             * The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley
1333             National Laboratory, the Department of Energy, and the GeneDesign developers may
1334             not be used to endorse or promote products derived from this software without
1335             specific prior written permission.
1336              
1337             THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
1338             ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1339             WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1340             DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
1341             INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1342             LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
1343             PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
1344             LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
1345             OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
1346             ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1347              
1348             =cut