File Coverage

blib/lib/Bio/GeneDesign/Random.pm
Criterion Covered Total %
statement 41 68 60.2
branch 4 14 28.5
condition n/a
subroutine 10 12 83.3
pod n/a
total 55 94 58.5


line stmt bran cond sub pod time code
1             #
2             # GeneDesign libraries for Random DNA generation
3             #
4              
5             =head1 NAME
6              
7             GeneDesign::Random
8              
9             =head1 VERSION
10              
11             Version 5.52
12              
13             =head1 DESCRIPTION
14              
15             Random DNA Generators
16            
17             =head1 AUTHOR
18              
19             Sarah Richardson <SMRichardson@lbl.gov>
20              
21             =cut
22              
23             package Bio::GeneDesign::Random;
24             require Exporter;
25              
26 11     11   7174 use Bio::GeneDesign::Codons qw(_find_in_frame);
  11         39  
  11         1133  
27 11     11   78 use Bio::GeneDesign::Basic qw(@BASES %NTIDES);
  11         27  
  11         1617  
28 11     11   67 use List::Util qw(shuffle);
  11         24  
  11         859  
29              
30 11     11   68 use strict;
  11         22  
  11         329  
31 11     11   60 use warnings;
  11         18  
  11         569  
32              
33             our $VERSION = 5.52;
34              
35 11     11   63 use base qw(Exporter);
  11         30  
  11         9874  
36             our @EXPORT_OK = qw(
37             _randomDNA
38             _randombase
39             _randombase_weighted
40             _replace_ambiguous_bases
41             _weighted_rand
42             _random_index
43             );
44             our %EXPORT_TAGS = (GD => \@EXPORT_OK);
45              
46             =head1 Functions
47              
48             =head2 _randomDNA()
49              
50             takes a target length and a GC percentage and generates a random nucleotide
51             sequence, with or without stops in the first frame
52             in: nucleotide sequence length (scalar),
53             GC percentage (0 <= scalar <= 100),
54             stop codon prevention(0 stops allowed, else no stops),
55             codon table (hash reference)
56             out: nucleotide sequence (string)
57            
58             =cut
59              
60             sub _randomDNA
61             {
62 0     0   0 my ($len, $GCperc, $stopswit, $codon_t) = @_;
63              
64 0 0       0 return q{} if ($len == 0);
65 0 0       0 return _randombase_weighted($GCperc) if ($len == 1);
66              
67             #GC
68 0         0 my $GCtotal = sprintf "%.0f", $GCperc * $len / 100;
69 0         0 my $Gcount = sprintf "%.0f", rand( $GCtotal );
70 0         0 my $Gstr = 'G' x $Gcount;
71 0         0 my $Ccount = $GCtotal - $Gcount;
72 0         0 my $Cstr = 'C' x $Ccount;
73            
74             #AT
75 0         0 my $ATtotal = $len - $GCtotal;
76 0         0 my $Acount = sprintf "%.0f", rand( $ATtotal );
77 0         0 my $Astr = 'A' x $Acount;
78 0         0 my $Tcount = $ATtotal - $Acount;
79 0         0 my $Tstr = 'T' x $Tcount;
80            
81 0         0 my @randomarray = shuffle( split( '', $Gstr . $Cstr . $Astr . $Tstr) );
82 0         0 my $DNA = join('', @randomarray);
83            
84 0 0       0 if ($stopswit)
85             {
86 0         0 my $stoplist = _find_in_frame($DNA, "*", $codon_t);
87 0         0 foreach my $pos (@$stoplist)
88             {
89 0         0 my $bit = substr($DNA, (($pos + 1) * 3) - 3, 3);
90             #substr($DNA, (($pos + 1) * 3) - 3, 3) = scalar reverse($bit);
91 0         0 substr($DNA, (($pos + 1) * 3) - 3, 3, scalar reverse($bit));
92 0 0       0 if (int(rand(1)+.5) == 1)
93             {
94 0         0 my $bat = substr($DNA, (($pos + 1) * 3) - 2, 2);
95             #substr($DNA, (($pos + 1) * 3) - 2, 2) = scalar reverse($bat);
96 0         0 substr($DNA, (($pos + 1) * 3) - 2, 2, scalar reverse($bat));
97             }
98             }
99             }
100 0         0 return $DNA;
101             }
102              
103             =head2 _randombase()
104              
105             when you just want one random base
106            
107             =cut
108              
109             sub _randombase
110             {
111 0     0   0 my $int = _random_index(4);
112 0         0 return $BASES[$int];
113             }
114              
115             =head2 _random_weighted_base()
116              
117             when you just want one random but weighted base
118            
119             =cut
120              
121             sub _randombase_weighted
122             {
123 30000     30000   41280 my ($GCp) = @_;
124 30000 50       60461 return _randombase() unless ($GCp);
125            
126 30000         46004 my $GCcount = $GCp/200 ;
127 30000         44047 my $ATcount = (100-$GCp)/200;
128 30000         101368 my $weight = {G => $GCcount, C => $GCcount, T => $ATcount, A => $ATcount};
129 30000         55480 return _weighted_rand($weight);
130             }
131              
132             =head2 _weighted_rand()
133            
134             =cut
135              
136             sub _weighted_rand
137             {
138 130000     130000   161443 my ($dist) = @_;
139 130000 50       278211 croak ("no distribution provided") unless ($dist);
140 130000         135146 my ($key, $weight);
141              
142 130000         129871 while (1)
143             {
144 130052         162784 my $rand = rand;
145 130052         443642 while ( ($key, $weight) = each %$dist )
146             {
147 280365 100       1341933 return $key if ($rand -= $weight) < 0;
148             }
149             }
150 0         0 return;
151             }
152              
153             =head2 _replace_ambiguous_bases
154              
155             =cut
156              
157             sub _replace_ambiguous_bases
158             {
159 3     3   6 my ($seq) = @_;
160 3         6 my $new = q{};
161 3         12 foreach my $char (split(q{}, $seq))
162             {
163 26         28 my @class = @{$NTIDES{$char}};
  26         65  
164 26         51 my $index = _random_index(scalar(@class));
165 26         124 $new .= $class[$index];
166             }
167 3         13 return $new;
168             }
169              
170             =head2 _random_index
171              
172             =cut
173              
174             sub _random_index
175             {
176 100026     100026   122457 my ($array_size) = @_;
177 100026         332942 return (sprintf "%.0f", rand($array_size)) % $array_size;
178             }
179              
180             1;
181              
182             __END__
183              
184             =head1 COPYRIGHT AND LICENSE
185              
186             Copyright (c) 2013, GeneDesign developers
187             All rights reserved.
188              
189             Redistribution and use in source and binary forms, with or without modification,
190             are permitted provided that the following conditions are met:
191              
192             * Redistributions of source code must retain the above copyright notice, this
193             list of conditions and the following disclaimer.
194              
195             * Redistributions in binary form must reproduce the above copyright notice, this
196             list of conditions and the following disclaimer in the documentation and/or
197             other materials provided with the distribution.
198              
199             * The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley
200             National Laboratory, the Department of Energy, and the GeneDesign developers may
201             not be used to endorse or promote products derived from this software without
202             specific prior written permission.
203              
204             THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
205             ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
206             WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
207             DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
208             INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
209             LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
210             PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
211             LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
212             OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
213             ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
214              
215             =cut