| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # GeneDesign module for sequence segmentation | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | =head1 NAME | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | Bio::GeneDesign::RestrictionEnzymes | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | =head1 VERSION | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | Version 5.56 | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | GeneDesign functions for handling restriction enzymes | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | =head1 AUTHOR | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | Sarah Richardson | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | =cut | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | package Bio::GeneDesign::RestrictionEnzymes; | 
| 24 |  |  |  |  |  |  |  | 
| 25 | 11 |  |  | 11 |  | 68 | use Exporter; | 
|  | 11 |  |  |  |  | 127 |  | 
|  | 11 |  |  |  |  | 425 |  | 
| 26 | 11 |  |  | 11 |  | 58 | use Bio::GeneDesign::Basic qw(:GD); | 
|  | 11 |  |  |  |  | 20 |  | 
|  | 11 |  |  |  |  | 1800 |  | 
| 27 | 11 |  |  | 11 |  | 5053 | use Bio::GeneDesign::RestrictionEnzyme; | 
|  | 11 |  |  |  |  | 27 |  | 
|  | 11 |  |  |  |  | 334 |  | 
| 28 | 11 |  |  | 11 |  | 70 | use Carp; | 
|  | 11 |  |  |  |  | 24 |  | 
|  | 11 |  |  |  |  | 534 |  | 
| 29 |  |  |  |  |  |  |  | 
| 30 | 11 |  |  | 11 |  | 60 | use strict; | 
|  | 11 |  |  |  |  | 20 |  | 
|  | 11 |  |  |  |  | 171 |  | 
| 31 | 11 |  |  | 11 |  | 51 | use warnings; | 
|  | 11 |  |  |  |  | 17 |  | 
|  | 11 |  |  |  |  | 366 |  | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | our $VERSION = 5.56; | 
| 34 |  |  |  |  |  |  |  | 
| 35 | 11 |  |  | 11 |  | 51 | use base qw(Exporter); | 
|  | 11 |  |  |  |  | 18 |  | 
|  | 11 |  |  |  |  | 6511 |  | 
| 36 |  |  |  |  |  |  | our @EXPORT_OK = qw( | 
| 37 |  |  |  |  |  |  | _define_sites | 
| 38 |  |  |  |  |  |  | _define_site_status | 
| 39 |  |  |  |  |  |  | _parse_enzyme_list | 
| 40 |  |  |  |  |  |  | $VERSION | 
| 41 |  |  |  |  |  |  | ); | 
| 42 |  |  |  |  |  |  | our %EXPORT_TAGS =  (GD => \@EXPORT_OK); | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | =head2 define_sites() | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | Generates a hash reference where the keys are enzyme names and the values are | 
| 47 |  |  |  |  |  |  | L objects. | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | =cut | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | sub _define_sites | 
| 52 |  |  |  |  |  |  | { | 
| 53 | 3 |  |  | 3 |  | 8 | my ($file) = @_; | 
| 54 | 3 | 50 |  |  |  | 129 | open (my $REFILE, '<', $file) || croak ("Can't find $file!\n"); | 
| 55 | 3 |  |  |  |  | 15 | my $ref = do { local $/ = <$REFILE> }; | 
|  | 3 |  |  |  |  | 236 |  | 
| 56 | 3 |  |  |  |  | 34 | close $REFILE; | 
| 57 | 3 |  |  |  |  | 354 | my @data = split(m{\n}x, $ref); | 
| 58 | 3 |  |  |  |  | 11 | my %RES; | 
| 59 | 3 |  |  |  |  | 9 | my @lines = grep {$_ !~ m{^ \# }x} @data; | 
|  | 771 |  |  |  |  | 1343 |  | 
| 60 | 3 |  |  |  |  | 12 | foreach my $line (@lines) | 
| 61 |  |  |  |  |  |  | { | 
| 62 | 768 |  |  |  |  | 8775 | my ($name, $site, $temp, $inact, $buf1, $buf2, $buf3, $buf4, $bufu, $dam, | 
| 63 |  |  |  |  |  |  | $dcm, $cpg, $score, $star, $vendor, $aggress) = split("\t", $line); | 
| 64 | 768 |  |  |  |  | 4718 | my $buffhsh = {NEB1 => $buf1, NEB2 => $buf2, NEB3 => $buf3, | 
| 65 |  |  |  |  |  |  | NEB4 => $buf4, Other => $bufu}; | 
| 66 | 768 | 100 |  |  |  | 2109 | $star = undef unless ($star eq 'y'); | 
| 67 | 768 |  |  |  |  | 3355 | my $re = Bio::GeneDesign::RestrictionEnzyme->new( | 
| 68 |  |  |  |  |  |  | -id => $name, | 
| 69 |  |  |  |  |  |  | -cutseq => $site, | 
| 70 |  |  |  |  |  |  | -temp   => $temp, | 
| 71 |  |  |  |  |  |  | -tempin => $inact, | 
| 72 |  |  |  |  |  |  | -score  => $score, | 
| 73 |  |  |  |  |  |  | -methdam => $dam, | 
| 74 |  |  |  |  |  |  | -methdcm => $dcm, | 
| 75 |  |  |  |  |  |  | -methcpg => $cpg, | 
| 76 |  |  |  |  |  |  | -staract => $star, | 
| 77 |  |  |  |  |  |  | -vendors => $vendor, | 
| 78 |  |  |  |  |  |  | -buffers => $buffhsh, | 
| 79 |  |  |  |  |  |  | -aggress => $aggress | 
| 80 |  |  |  |  |  |  | ); | 
| 81 | 768 |  |  |  |  | 3811 | $RES{$re->{id}} = $re; | 
| 82 |  |  |  |  |  |  | } | 
| 83 |  |  |  |  |  |  | #Make exclusion lists | 
| 84 | 3 |  |  |  |  | 47 | foreach my $re (values %RES) | 
| 85 |  |  |  |  |  |  | { | 
| 86 | 768 |  |  |  |  | 2192 | my $rid = $re->{id}; | 
| 87 | 768 |  |  |  |  | 1330 | my %excl; | 
| 88 | 768 |  |  |  |  | 8541 | foreach my $ar (sort grep {$_->{id} ne $rid} values %RES) | 
|  | 196608 |  |  |  |  | 678060 |  | 
| 89 |  |  |  |  |  |  | { | 
| 90 | 195840 |  |  |  |  | 233809 | foreach my $arreg (@{$ar->{regex}}) | 
|  | 195840 |  |  |  |  | 281257 |  | 
| 91 |  |  |  |  |  |  | { | 
| 92 | 246330 | 100 |  |  |  | 601946 | $excl{$ar->{id}}++ if ($re->{recseq} =~ $arreg) | 
| 93 |  |  |  |  |  |  | } | 
| 94 | 195840 |  |  |  |  | 235352 | foreach my $rereg (@{$re->{regex}}) | 
|  | 195840 |  |  |  |  | 272077 |  | 
| 95 |  |  |  |  |  |  | { | 
| 96 | 246330 | 100 |  |  |  | 632780 | $excl{$ar->{id}}++ if ($ar->{recseq} =~ $rereg) | 
| 97 |  |  |  |  |  |  | } | 
| 98 |  |  |  |  |  |  | } | 
| 99 | 768 |  |  |  |  | 5197 | my @skips = sort keys %excl; | 
| 100 | 768 |  |  |  |  | 3242 | $re->exclude(\@skips); | 
| 101 |  |  |  |  |  |  | } | 
| 102 | 3 |  |  |  |  | 258 | return \%RES; | 
| 103 |  |  |  |  |  |  | } | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | =head2 define_site_status | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | Generates a hash describing the restriction count of a nucleotide sequence. | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | Arguments: nucleotide sequence as a string | 
| 110 |  |  |  |  |  |  | an arrayref of L objects | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | Returns: reference to a hash where the keys are enzyme ids and the value is | 
| 113 |  |  |  |  |  |  | a count of their occurence in the nucleotide sequence | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | =cut | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | sub _define_site_status | 
| 118 |  |  |  |  |  |  | { | 
| 119 | 1 |  |  | 1 |  | 3 | my ($seq, $RES) = @_; | 
| 120 | 1 |  |  |  |  | 3 | my $SITE_STATUS = {}; | 
| 121 | 1 |  |  |  |  | 2 | foreach my $re (@{$RES}) | 
|  | 1 |  |  |  |  | 3 |  | 
| 122 |  |  |  |  |  |  | { | 
| 123 | 29 |  |  |  |  | 60 | my $tmphsh = $re->positions($seq); | 
| 124 | 29 |  |  |  |  | 35 | $SITE_STATUS->{$re->id} = scalar keys %{$tmphsh}; | 
|  | 29 |  |  |  |  | 74 |  | 
| 125 |  |  |  |  |  |  | } | 
| 126 | 1 |  |  |  |  | 4 | return $SITE_STATUS; | 
| 127 |  |  |  |  |  |  | } | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | =head2 _parse_enzyme_list | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | =cut | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | sub _parse_enzyme_list | 
| 134 |  |  |  |  |  |  | { | 
| 135 | 3 |  |  | 3 |  | 12 | my ($path) = @_; | 
| 136 | 3 | 50 |  |  |  | 179 | open (my $REFILE, '<', $path) || croak ("Can't read $path!\n"); | 
| 137 | 3 |  |  |  |  | 10 | my $ref = do { local $/ = <$REFILE> }; | 
|  | 3 |  |  |  |  | 152 |  | 
| 138 | 3 |  |  |  |  | 42 | close $REFILE; | 
| 139 | 3 |  |  |  |  | 68 | my @list = split m{\s}x, $ref; | 
| 140 | 3 |  |  |  |  | 21 | return \@list; | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | 1; | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | __END__ |