File Coverage

blib/lib/Bio/GeneDesign.pm
Criterion Covered Total %
statement 381 684 55.7
branch 117 368 31.7
condition 34 150 22.6
subroutine 50 76 65.7
pod 59 59 100.0
total 641 1337 47.9


line stmt bran cond sub pod time code
1             #
2             # GeneDesign engine
3             #
4              
5             =head1 NAME
6              
7             Bio::GeneDesign
8              
9             =head1 VERSION
10              
11             Version 5.52
12              
13             =head1 DESCRIPTION
14              
15             =head1 AUTHOR
16              
17             Sarah Richardson <smrichardson@lbl.gov>
18              
19             =cut
20              
21             package Bio::GeneDesign;
22 11     11   316051 use base qw(Bio::Root::Root);
  11         27  
  11         10399  
23              
24 11     11   657490 use Bio::GeneDesign::ConfigData;
  11         33  
  11         433  
25 11     11   6420 use Bio::GeneDesign::Basic qw(:GD);
  11         35  
  11         3295  
26 11     11   7111 use Bio::GeneDesign::CodonJuggle qw(:GD);
  11         36  
  11         2551  
27 11     11   67 use Bio::GeneDesign::Codons qw(:GD);
  11         21  
  11         3034  
28 11     11   9023 use Bio::GeneDesign::Oligo qw(:GD);
  11         28  
  11         1636  
29 11     11   69 use Bio::GeneDesign::Random qw(:GD);
  11         24  
  11         1654  
30 11     11   10365 use Bio::GeneDesign::RestrictionEnzymes qw(:GD);
  11         39  
  11         2169  
31 11     11   7662 use Bio::GeneDesign::ReverseTranslate qw(:GD);
  11         33  
  11         1588  
32 11     11   6377 use Bio::GeneDesign::PrefixTree;
  11         31  
  11         320  
33 11     11   77 use File::Basename;
  11         20  
  11         799  
34 11     11   12602 use Bio::SeqIO;
  11         506882  
  11         911  
35 11     11   133 use Carp;
  11         33  
  11         868  
36              
37 11     11   70 use strict;
  11         25  
  11         403  
38 11     11   68 use warnings;
  11         21  
  11         114502  
39              
40             my $VERSION = 5.52;
41            
42             =head1 CONSTRUCTORS
43              
44             =head2 new
45              
46             Returns an initialized Bio::GeneDesign object.
47            
48             This function reads the ConfigData written at installation, imports the
49             relevant sublibraries, and sets the relevant paths.
50            
51             my $GD = Bio::GeneDesign->new();
52            
53             =cut
54              
55             sub new
56             {
57 9     9 1 131 my ($class, @args) = @_;
58 9         141 my $self = $class->SUPER::new(@args);
59 9         129 bless $self, $class;
60            
61 9         132 $self->{script_path} = Bio::GeneDesign::ConfigData->config('script_path');
62 9         54 $self->{conf} = Bio::GeneDesign::ConfigData->config('conf_path');
63 9 50       71 $self->{conf} .= q{/} unless substr($self->{conf}, -1, 1) eq q{/};
64            
65 9         50 $self->{tmp_path} = Bio::GeneDesign::ConfigData->config('tmp_path');
66 9 50       79 $self->{tmp_path} .= q{/} unless substr($self->{tmp_path}, -1, 1) eq q{/};
67            
68 9         53 $self->{graph} = Bio::GeneDesign::ConfigData->config('graphing_support');
69 9 50       49 if ($self->{graph})
70             {
71 0         0 require Bio::GeneDesign::Graph;
72 0         0 import Bio::GeneDesign::Graph qw(:GD);
73             }
74            
75 9         50 $self->{EMBOSS} = Bio::GeneDesign::ConfigData->config('EMBOSS_support');
76 9 50       43 if ($self->{EMBOSS})
77             {
78 0         0 require Bio::GeneDesign::Palindrome;
79 0         0 import Bio::GeneDesign::Palindrome qw(:GD);
80             }
81            
82 9         44 $self->{BLAST} = Bio::GeneDesign::ConfigData->config('BLAST_support');
83 9 50       51 if ($self->BLAST)
84             {
85             #$ENV{BLASTPLUSDIR} = Bio::GeneDesign::ConfigData->config('blast_path');
86 0         0 require Bio::GeneDesign::Blast;
87 0         0 import Bio::GeneDesign::Blast qw(:GD);
88             }
89              
90 9         46 $self->{vmatch} = Bio::GeneDesign::ConfigData->config('vmatch_support');
91 9 50       120 if ($self->{vmatch})
92             {
93 0         0 require Bio::GeneDesign::Vmatch;
94 0         0 import Bio::GeneDesign::Vmatch qw(:GD);
95             }
96            
97 9         49 $self->{codon_path} = $self->{conf} . 'codon_tables/';
98 9         27 $self->{organism} = undef;
99 9         24 $self->{codontable} = undef;
100 9         26 $self->{enzyme_set} = undef;
101 9         26 $self->{version} = $VERSION;
102 9         24 $self->{amb_trans_memo} = {};
103            
104 9         42 return $self;
105             }
106              
107             =head1 ACCESSORS
108              
109             =cut
110              
111             =head2 EMBOSS
112              
113             returns a value if EMBOSS_support was vetted and approved during installation.
114              
115             =cut
116              
117             sub EMBOSS
118             {
119 0     0 1 0 my ($self) = @_;
120 0         0 return $self->{'EMBOSS'};
121             }
122              
123             =head2 BLAST
124              
125             returns a value if BLAST_support was vetted and approved during installation.
126              
127             =cut
128              
129             sub BLAST
130             {
131 9     9 1 27 my ($self) = @_;
132 9         45 return $self->{'BLAST'};
133             }
134              
135             =head2 graph
136              
137             returns a value if graphing_support was vetted and approved during installation.
138              
139             =cut
140              
141             sub graph
142             {
143 0     0 1 0 my ($self) = @_;
144 0         0 return $self->{'graph'};
145             }
146              
147             =head2 vmatch
148              
149             returns a value if vmatch_support was vetted and approved during installation.
150              
151             =cut
152              
153             sub vmatch
154             {
155 0     0 1 0 my ($self) = @_;
156 0         0 return $self->{'vmatch'};
157             }
158              
159             =head2 enzyme_set
160              
161             Returns a hash reference where the keys are enzyme names and the values are
162             L<RestrictionEnzyme|Bio::GeneDesign::RestrictionEnzyme> objects, if the enzyme
163             set has been defined.
164              
165             To set this value, use L<set_restriction_enzymes|/set_restriction_enzymes>.
166              
167             =cut
168              
169             sub enzyme_set
170             {
171 4     4 1 1237 my ($self) = @_;
172 4         52 return $self->{'enzyme_set'};
173             }
174              
175             =head2 enzyme_set_name
176              
177             Returns the name of the enzyme set in use, if there is one.
178              
179             To set this value, use L<set_restriction_enzymes|/set_restriction_enzymes>.
180              
181             =cut
182              
183             sub enzyme_set_name
184             {
185 0     0 1 0 my ($self) = @_;
186 0         0 return $self->{'enzyme_set_name'};
187             }
188              
189             =head2 all_enzymes
190              
191             Returns a hash reference where the keys are enzyme names and the values are
192             L<RestrictionEnzyme|Bio::GeneDesign::RestrictionEnzyme> objects
193              
194             To set this value, use L<set_restriction_enzymes|/set_restriction_enzymes>.
195              
196             =cut
197              
198             sub all_enzymes
199             {
200 0     0 1 0 my ($self) = @_;
201 0         0 return $self->{'all_enzymes'};
202             }
203              
204             =head2 organism
205              
206             Returns the name of the organism in use, if there is one.
207              
208             To set this value, use L<set_organism|/set_organism>.
209              
210             =cut
211              
212             sub organism
213             {
214 0     0 1 0 my ($self) = @_;
215 0         0 return $self->{'organism'};
216             }
217              
218             =head2 codontable
219              
220             Returns the codon table in use, if there is one.
221              
222             The codon table is a hash reference where the keys are upper case nucleotides
223             and the values are upper case single letter amino acids.
224              
225             my $codon_t = $GD->codontable();
226             $codon_t->{"ATG"} eq "M" || die;
227              
228             To set this value, use L<set_codontable|/set_codontable>.
229              
230             =cut
231              
232             sub codontable
233             {
234 1     1 1 718 my ($self) = @_;
235 1         11 return $self->{'codontable'};
236             }
237              
238             =head2 reversecodontable
239              
240             Returns the reverse codon table in use, if there is one.
241              
242             The reverse codon table is a hash reference where the keys are upper case single
243             letter amino acids and the values are upper case nucleotides.
244              
245             my $revcodon_t = $GD->reversecodontable();
246             $revcodon_t->{"M"} eq "ATG" || die;
247            
248             This value is set automatically when L<set_codontable|/set_codontable> is run.
249              
250             =cut
251              
252             sub reversecodontable
253             {
254 1     1 1 1722 my ($self) = @_;
255 1         5 return $self->{'reversecodontable'};
256             }
257              
258             =head2 rscutable
259              
260             Returns the RSCU table in use, if there is one.
261              
262             The RSCU codon table is a hash reference where the keys are upper case
263             nucleotides and the values are floats.
264              
265             my $rscu_t = $GD->rscutable();
266             $rscu_t->{"ATG"} eq 1.00 || die;
267              
268             To set this value, use L<set_rscu_table|/set_rscutable>.
269              
270             =cut
271              
272             sub rscutable
273             {
274 1     1 1 1767 my ($self) = @_;
275 1         6 return $self->{'rscutable'};
276             }
277              
278              
279             =head1 FUNCTIONS
280              
281             =cut
282              
283             =head2 melt
284              
285             my $Tm = $GD->melt(-sequence => $myseq);
286              
287             The -sequence argument is required.
288              
289             Returns the melting temperature of a DNA sequence.
290              
291             You can set the salt and DNA concentrations with the -salt and -concentration
292             arguments; they are 50mm (.05) and 100 pm (.0000001) respectively.
293              
294             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
295             object to be analyzed with the -sequence flag.
296              
297             There are four different formulae to choose from. If you wish to use the nearest
298             neighbor method, use the -nearest_neighbor flag. Otherwise the appropriate
299             formula will be determined by the length of your -sequence argument.
300              
301             For sequences under 14 base pairs:
302             Tm = (4 * #GC) + (2 * #AT).
303              
304             For sequences between 14 and 50 base pairs:
305             Tm = 100.5 + (41 * #GC / length) - (820 / length) + 16.6 * log10(salt)
306              
307             For sequences over 50 base pairs:
308             Tm = 81.5 + (41 * #GC / length) - (500 / length) + 16.6 * log10(salt) - .62;
309              
310             =cut
311              
312             sub melt
313             {
314 4     4 1 4230 my ($self, @args) = @_;
315              
316 4         20 my ($seq, $salt, $conc, $nnflag)
317             = $self->_rearrange([qw(
318             sequence
319             salt
320             concentration
321             nearest_neighbor)], @args);
322              
323 4 50       86 $self->throw("No sequence provided for the melt function")
324             unless ($seq);
325            
326 4 100       10 $nnflag = $nnflag ? 1 : 0;
327 4   50     17 $salt = $salt || .05;
328 4   50     13 $conc = $conc || .0000001;
329 4         8 my $str = $self->_stripdown($seq, q{}, 1);
330            
331 4 100       12 if ($nnflag)
332             {
333 1         5 return _ntherm($str, $salt, $conc);
334             }
335 3         10 return _melt($str, $salt, $conc);
336             }
337              
338             =head2 complement
339              
340             $my_seq = "AATTCG";
341            
342             my $complemented_seq = $GD->complement($my_seq);
343             $complemented_seq eq "TTAAGC" || die;
344            
345             my $reverse_complemented_seq = $GD->complement($my_seq, 1);
346             $reverse_complemented_seq eq "CGAATT" || die;
347            
348             #clean
349             my $complemented_seq = $GD->complement(-sequence => $my_seq);
350             $complemented_seq eq "TTAAGC" || die;
351            
352             my $reverse_complemented_seq = $GD->complement(-sequence => $my_seq,
353             -reverse => 1);
354             $reverse_complemented_seq eq "CGAATT" || die;
355            
356              
357             The -sequence argument is required.
358              
359             Complements or reverse complements a DNA sequence.
360              
361             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
362             object to be processed.
363              
364             If you also pass along a true statement, the sequence will be reversed and
365             complemented.
366              
367             =cut
368              
369             sub complement
370             {
371 4     4 1 3474 my ($self, @args) = @_;
372              
373 4         22 my ($seq, $swit)
374             = $self->_rearrange([qw(
375             sequence
376             reverse)], @args);
377            
378 4 50       38 $self->throw("No sequence provided for the complement function")
379             unless $seq;
380              
381 4   100     14 $swit = $swit || 0;
382            
383 4         10 my $str = $self->_stripdown($seq, q{}, 1);
384              
385 4         15 return _complement($str, $swit);
386             }
387              
388             =head2 count
389              
390             $my_seq = "AATTCG";
391             my $count = $GD->count($my_seq);
392             $count->{C} == 1 || die;
393             $count->{G} == 1 || die;
394             $count->{A} == 2 || die;
395             $count->{GCp} == 33.3 || die;
396             $count->{ATp} == 66.7 || die;
397            
398             #clean
399             my $count = $GD->count(-sequence => $my_seq);
400              
401             You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
402             object.
403              
404             the count function counts the bases in a DNA sequence and returns a hash
405             reference where each base (including the ambiguous bases) are keys and the
406             values are the number of times they appear in the sequence. There are also the
407             special values GCp and ATp for GC and AT percentage.
408              
409             =cut
410              
411             sub count
412             {
413 2     2 1 2778 my ($self, @args) = @_;
414              
415 2         12 my ($seq) = $self->_rearrange([qw(sequence)], @args);
416            
417 2 50       23 $self->throw("No sequence provided for the count function")
418             unless ($seq);
419              
420              
421 2         6 my $str = $self->_stripdown($seq, q{}, 1);
422              
423 2         10 return _count($str);
424             }
425              
426             =head2 regex_nt
427            
428             my $my_seq = "ABC";
429             my $regex = $GD->regex_nt(-sequence => $my_seq);
430             # $regex is qr/A[CGT]C/;
431            
432             my $regarr = $GD->regex_nt(-sequence => $my_seq --reverse_complement => 1);
433             # $regarr is [qr/A[CGT]C/, qr/G[ACG]T/]
434            
435              
436             You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
437             object to be processed with the -sequence flag.
438              
439             regex_nt creates a compiled regular expression or a set of them that can be used
440             to query large nucleotide sequences for possibly ambiguous subsequences.
441              
442              
443             If you want to get regular expressions for both the forward and reverse senses
444             of the DNA, use the -reverse_complement flag and expect a reference to an array
445             of compiled regexes.
446              
447             =cut
448              
449             sub regex_nt
450             {
451 6     6 1 12372 my ($self, @args) = @_;
452            
453 6         38 my ($seq, $arrswit)
454             = $self->_rearrange([qw(
455             sequence
456             reverse_complement)], @args
457             );
458            
459 6 50       106 $self->throw("no sequence provided the regex_nt function")
460             unless ($seq);
461              
462 6         20 my $str = $self->_stripdown($seq, q{}, 1);
463            
464 6 100       18 if ($arrswit)
465             {
466 2         9 return _regarr($str);
467             }
468             else
469             {
470 4         21 return _regres($str, 1);
471             }
472             }
473              
474             =head2 regex_aa
475              
476             my $my_pep = "AEQ*";
477             my $regex = $GD->regex_aa(-sequence => $my_pep);
478             $regex == qr/AEQ[\*]/ || die;
479            
480             Creates a compiled regular expression or a set of them that can be used to query
481             large amino acid sequences for smaller subsequences.
482              
483             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
484             object to be processed with the -sequence flag.
485              
486             =cut
487              
488             sub regex_aa
489             {
490 1     1 1 714 my ($self, $seq) = @_;
491            
492 1 50       6 $self->throw("no sequence provided for the regex_aa function")
493             unless ($seq);
494              
495 1         4 my $str = $self->_stripdown($seq, q{}, 1);
496            
497 1         6 return _regres($str, 2);
498             }
499              
500             =head2 sequence_is_ambiguous
501              
502             my $my_seq = "ABC";
503             my $flag = $GD->sequence_is_ambiguous($my_seq);
504             $flag == 1 || die;
505            
506             $my_seq = "ATC";
507             $flag = $GD->sequence_is_ambiguous($my_seq);
508             $flag == 0 || die;
509            
510             Checks to see if a DNA sequence contains ambiguous bases (RYMKWSBDHVN) and
511             returns true if it does.
512              
513             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
514             object to be processed.
515              
516             =cut
517              
518             sub sequence_is_ambiguous
519             {
520 5     5 1 1599 my ($self, $seq) = @_;
521            
522 5 50       16 $self->throw("no sequence provided for the sequence_is_ambiguous function")
523             unless ($seq);
524              
525 5         15 my $str = $self->_stripdown($seq, q{}, 1);
526            
527 5         22 return _is_ambiguous($str);
528             }
529              
530             =head2 ambiguous_translation
531              
532             my $my_seq = "ABC";
533             my @peps = $GD->ambiguous_translation(-sequence => $my_seq, -frame => 1);
534             # @peps is qw(I T C)
535            
536             You must pass a string variable, a Bio::Seq object, or a Bio::SeqFeatureI object
537             to be processed.
538              
539             Translates a nucleotide sequence that may have ambiguous bases and returns an
540             array of possible peptides.
541              
542             The frame argument may be 1, 2, 3, -1, -2, or -3.
543             It may also be t (three, 1, 2, 3), or s (six, 1, 2, 3, -1, -2, -3).
544             It defaults to 1.
545              
546             =cut
547              
548             sub ambiguous_translation
549             {
550 4     4 1 10088 my ($self, @args) = @_;
551            
552 4         35 my ($seq, $frame)
553             = $self->_rearrange([qw(sequence frame)], @args);
554            
555 4 50       120 $self->throw("no sequence provided for the ambiguous_translation function")
556             unless ($seq);
557            
558 4   50     11 $frame = $frame || 1;
559            
560 4         9 my %ambtransswits = map {$_ => 1} qw(1 2 3 -1 -2 -3 s t);
  32         75  
561            
562 4 50       18 $self->throw("Bad frame argument to ambiguous_translation")
563             unless (exists $ambtransswits{$frame});
564            
565 4         13 my $str = $self->_stripdown($seq, q{}, 1);
566            
567 4         22 return _amb_translation($str,
568             $self->{codontable},
569             $frame,
570             $self->{amb_trans_memo});
571             }
572              
573             =head2 ambiguous_transcription
574              
575             my $my_seq = "ABC";
576             my $seqs = $GD->ambiguous_transcription($my_seq);
577             # $seqs is [qw(ACC AGC ATC)]
578            
579             Deambiguates a nucleotide sequence that may have ambiguous bases and returns a
580             reference to a sorted array of possible unambiguous sequences.
581              
582             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
583             object to be processed.
584              
585             =cut
586              
587             sub ambiguous_transcription
588             {
589 1     1 1 1923 my ($self, $seq) = @_;
590            
591 1 50       4 $self->throw("no sequence provided for the ambiguous_transcription function")
592             unless ($seq);
593              
594 1         4 my $str = $self->_stripdown($seq, q{}, 1);
595            
596 1         7 return _amb_transcription($str);
597             }
598              
599             =head2 positions
600              
601             my $seq = "TGCTGACTGCAGTCAGTACACTACGTACGTGCATGAC";
602             my $seek = "CWC";
603            
604             my $positions = $GD->positions(-sequence => $seq,
605             -query => $seek);
606             # $positions is {18 => "CAC"}
607              
608             $positions = $GD->positions(-sequence => $seq,
609             -query => $seek,
610             -reverse_complement => 1);
611             # $positions is {18 => "CAC", 28 => "GTG"}
612            
613             Finds and returns all the positions and sequences of a potentially ambiguous
614             subsequence in a larger sequence. The reverse_complement flag is off by default.
615              
616             You can pass either string variables, Bio::Seq objects, or Bio::SeqFeatureI
617             objects as the sequence and query arguments; additionally you may pass a
618             L<RestrictionEnzyme|Bio::GeneDesign::RestrictionEnzyme> object as the query
619             argument.
620              
621             =cut
622              
623             sub positions
624             {
625 2     2 1 2749 my ($self, @args) = @_;
626            
627 2         12 my ($seq, $seek, $revcom)
628             = $self->_rearrange([qw(
629             sequence
630             query
631             reverse_complement)], @args);
632              
633 2 50       55 $self->throw("no sequence provided for the positions function")
634             unless ($seq);
635            
636 2 50       11 $self->throw("no query provided for the positions function")
637             unless ($seek);
638            
639              
640 2         7 my $base = $self->_stripdown($seq, q{}, 1);
641              
642 2         4 my $regarr = [];
643 2         3 my $query = $seek;
644 2         6 my $qref = ref($seek);
645 2 50       6 if ($qref)
646             {
647 0 0 0     0 $self->throw("object $qref is not a Bio::Seq or Bio::SeqFeature, or " .
      0        
648             "Bio::GeneDesign::RestrictionEnzyme object")
649             unless ($seek->isa("Bio::Seq")
650             || $seek->isa("Bio::SeqFeatureI")
651             || $seek->isa("Bio::GeneDesign::RestrictionEnzyme"));
652 0 0       0 if ($seek->isa("Bio::GeneDesign::RestrictionEnzyme"))
653             {
654 0         0 $regarr = $seek->regex;
655             }
656             else
657             {
658 0 0       0 $query = ref($seek->seq) ? $seek->seq->seq : $seek->seq;
659 0 0       0 $regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)];
660             }
661             }
662             else
663             {
664 2 50       18 $regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)];
665             }
666              
667 2         11 return _positions($base, $regarr);
668             }
669              
670             =head2 set_codontable
671              
672             # load a codon table from the GeneDesign configuration directory
673             $GD->set_codontable(-organism_name => "yeast");
674            
675             # load a codon table from an arbitrary path and catch it in a variable
676             my $codon_t = $GD->set_codontable(-organism_name => "custom",
677             -table_path => "/path/to/table.ct");
678            
679             The -organism_name argument is required.
680              
681             This function loads, sets, and returns a codon definition table. After it is run
682             the accessor L<codontable|/codontable> will return the hash reference that
683             represents the codon table.
684              
685             If no path is provided, the configuration directory /codon_tables is checked for
686             tables that match the provided organism name. If there is no table in that
687             directory, a warning will appear and the standard codon table will be
688             used.
689              
690             Any codon table that is missing a definition for a codon will cause a warning to
691             be issued. The table format for codon tables is
692              
693             # Standard genetic code
694             {TTT} = F
695             {TTC} = F
696             {TTA} = L
697             ...
698              
699             See L<NCBI's table|http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG1>
700              
701             =cut
702              
703             sub set_codontable
704             {
705 7     7 1 26 my ($self, @args) = @_;
706 7         45 my ($orgname, $table_path)
707             = $self->_rearrange([qw(
708             organism_name
709             table_path)], @args);
710              
711 7 50       181 $self->throw("No organsim name provided") unless ($orgname);
712 7         35 $self->{organism} = $orgname;
713            
714 7 50 33     196 $self->throw("$table_path does not exist")
715             if ($table_path && ! -e $table_path);
716            
717 7 50       31 if (! $table_path)
718             {
719 0         0 $table_path = $self->{codon_path} . $orgname . ".ct";
720 0 0       0 if (! -e $table_path)
721             {
722 0         0 warn "No Codon table for $orgname found. Using Standard values\n";
723 0         0 $table_path = $self->{codon_path} . "Standard.ct";
724             }
725             }
726              
727 7         92 $self->{codontable} = _parse_codon_file($table_path);
728 7         46 $self->{reversecodontable} = _reverse_codon_table($self->{codontable});
729 7         29 return $self->{codontable};
730             }
731              
732             =head2 set_rscutable
733              
734             # load a RSCU table from the GeneDesign configuration directory
735             $GD->set_rscutable(-organism_name => "yeast");
736            
737             # load an RSCU table from an arbitrary path and catch it in a variable
738             my $rscu_t = $GD->set_rscutable(-organism_name => "custom",
739             -table_path => "/path/to/table.rscu");
740            
741             The -organism_name argument is required.
742              
743             This function loads, sets, and returns an RSCU table. After it is run
744             the accessor L<rscutable|/rscutable> will return the hash reference that
745             represents the RSCU table.
746              
747             If no path is provided, the configuration directory /codon_tables is checked for
748             tables that match the provided organism name. If there is no table in that
749             directory, a warning will appear and the flat RSCU table will be used.
750              
751             Any RSCU table that is missing a definition for a codon will cause a warning to
752             be issued. The table format for RSCU tables is
753              
754             # Saccharomyces cerevisiae (Highly expressed genes)
755             # Nucleic Acids Res 16, 8207-8211 (1988)
756             {TTT} = 0.19
757             {TTC} = 1.81
758             {TTA} = 0.49
759             ...
760              
761             See L<Sharp et al. 1986|http://www.ncbi.nlm.nih.gov/pubmed/3526280>.
762              
763             =cut
764              
765             sub set_rscutable
766             {
767 7     7 1 27 my ($self, @args) = @_;
768 7         44 my ($orgname, $rscu_path)
769             = $self->_rearrange([qw(
770             organism_name
771             rscu_path)], @args);
772              
773 7 50       213 $self->throw("No organsim name provided") unless ($orgname);
774 7         22 $self->{organism} = $orgname;
775            
776 7 50 33     181 $self->throw("$rscu_path does not exist")
777             if ($rscu_path && ! -e $rscu_path);
778            
779 7 50       47 if (! $rscu_path)
780             {
781 0         0 $rscu_path = $self->{codon_path} . $orgname . ".rscu";
782 0 0       0 if (! -e $rscu_path)
783             {
784 0         0 warn "No RSCU table for $orgname found. Using Flat values\n";
785 0         0 $rscu_path = $self->{codon_path} . "Flat.rscu";
786             }
787             }
788              
789 7         34 $self->{rscutable} = _parse_codon_file($rscu_path);
790 7         29 return $self->{rscutable};
791             }
792              
793             =head2 set_organism
794              
795             # load both codon tables and RSCU tables simultaneously
796             $GD->set_organism(-organism_name => "yeast");
797            
798             # with arguments
799             $GD->set_organism(-organism_name => "custom",
800             -table_path => "/path/to/table.ct",
801             -rscu_path => "/path/to/table.rscu");
802            
803              
804             The -organism_name argument is required.
805              
806             This function is just a shortcut; it runs L<set_codontable/set_codontable> and
807             L<set_rscutable/set_rscutable>. See those functions for details.
808              
809             =cut
810              
811             sub set_organism
812             {
813 7     7 1 92 my ($self, @args) = @_;
814            
815 7         101 my ($orgname, $table_path, $rscu_path)
816             = $self->_rearrange([qw(
817             organism_name
818             table_path
819             rscu_path)], @args);
820            
821 7 50       311 $self->throw("No organsim name provided") unless ($orgname);
822 7         20 $self->{organism} = $orgname;
823            
824 7         50 $self->set_codontable(-organism_name => $orgname, -table_path => $table_path);
825 7         54 $self->set_rscutable(-organism_name => $orgname, -rscu_path=> $rscu_path);
826              
827 7         32 return;
828             }
829              
830             =head2 codon_count
831              
832             # count the codons in a list of sequences
833             my $tally = $GD->codon_count(-input => \@sequences);
834            
835             # add a gene to an existing codon count
836             $tally = $GD->codon_count(-input => $sequence,
837             -count => $tally);
838            
839             # add a list of Bio::Seq objects to an existing codon count
840             $tally = $GD->codon_count(-input => \@seqobjects,
841             -count => $tally);
842            
843             The -input argument is required and will take a string variable, a Bio::Seq
844             object, a Bio::SeqFeatureI object, or a reference to an array full of any
845             combination of those things.
846              
847             The codon_count function takes a set of sequences and counts how often each
848             codon appears in them. It returns a hash reference where the keys are upper case
849             nucleotide codons and the values are integers. If you pass a hash reference
850             containing codon counts with the -count argument, new counts will be added to
851             the old values.
852              
853             This function will warn you if non nucleotide codons are found.
854              
855             TODO: what about ambiguous codons?
856              
857             =cut
858              
859             sub codon_count
860             {
861 3     3 1 3912 my ($self, @args) = @_;
862              
863 3         19 my ($input, $count) = $self->_rearrange([qw(input count)], @args);
864            
865 3 50       51 $self->throw("no codon table has been defined")
866             unless $self->{codontable};
867            
868 3 50       11 $self->throw("no sequences were provided to count")
869             unless $input;
870              
871 3         11 my $list = $self->_stripdown($input, 'ARRAY', 0);
872            
873 3         18 my $cod_count = _codon_count($list,
874             $self->{codontable},
875             $count);
876            
877 3 50       14 warn("There are bad codons in codon count\n") if (exists $cod_count->{XXX});
878            
879 3         16 return $cod_count;
880             }
881              
882             =head2 generate_RSCU_table
883              
884             my $rscu_t = $GD->generate_RSCU_table(-sequences => \@list_of_sequences);
885            
886             The -sequences argument is required and will take a string variable, a Bio::Seq
887             object, a Bio::SeqFeatureI object, or a reference to an array full of any
888             combination of those things.
889              
890             The generate_RSCU_table function takes a set of sequences, counts how often each
891             codon appears, and returns an RSCU table as a hash reference where the keys are
892             upper case nucleotide codons and the values are floats.
893              
894             See L<Sharp et al. 1986|http://www.ncbi.nlm.nih.gov/pubmed/3526280>.
895              
896             =cut
897              
898             sub generate_RSCU_table
899             {
900 1     1 1 1742 my ($self, @args) = @_;
901              
902 1         7 my ($seqobjs) = $self->_rearrange([qw(sequences)], @args);
903              
904 1 50       11 $self->throw("no codon table has been defined")
905             unless $self->{codontable};
906              
907 1 50       6 $self->throw("Sequence set must be provided")
908             unless ($seqobjs);
909            
910 1         5 return _generate_RSCU_table(
911             $self->codon_count(-input => $seqobjs),
912             $self->{codontable},
913             $self->{reversecodontable}
914             );
915             }
916              
917             =head2 generate_codon_report
918              
919             my $report = $GD->generate_codon_report(-sequences => \@list_of_sequences);
920              
921             The report will have the format
922              
923             TTT (F) 12800 0.74
924             TTC (F) 21837 1.26
925             TTA (L) 4859 0.31
926             TTG (L) 18806 1.22
927              
928             where the first column in each group is the codon, the second column is the one
929             letter amino acid abbreviation in parentheses, the third column is the number of
930             times that codon has been seen, and the fourth column is the RSCU value for that
931             codon.
932              
933             This report comes in a 4x4 layout, as would a standard genetic code table in a
934             textbook.
935              
936             NO TEST
937              
938             =cut
939              
940             sub generate_codon_report
941             {
942 0     0 1 0 my ($self, @args) = @_;
943              
944 0         0 my ($seqobjs) = $self->_rearrange([qw(sequences)], @args);
945              
946 0 0       0 $self->throw("no codon table has been defined")
947             unless $self->{codontable};
948              
949 0 0       0 $self->throw("Sequence set must be provided")
950             unless ($seqobjs);
951            
952 0         0 my $count_t = $self->codon_count(-input => $seqobjs);
953            
954 0         0 my $rscu_t = _generate_RSCU_table(
955             $count_t,
956             $self->{codontable},
957             $self->{reversecodontable}
958             );
959            
960 0         0 my $report = _generate_codon_report(
961             $count_t,
962             $self->{codontable},
963             $rscu_t
964             );
965            
966 0         0 return $report;
967             }
968              
969             =head2 generate_RSCU_file
970              
971             my $contents = $GD->generate_RSCU_file(
972             -sequences => \@seqs,
973             -comments => ["Got these codons from mice"]
974             );
975             open (my $OUT, '>', '/path/to/cods') || die "can't write to /path/to/cods";
976             print $OUT $contents;
977             close $OUT;
978              
979             This function generates a string that can be written to file to serve as a
980             GeneDesign RSCU table. Provide a set of sequences and an optional array
981             reference of comments to prepend to the file.
982              
983             The file will have the format
984             # Comment 1
985             # ...
986             # Comment n
987             {TTT} = 0.19
988             {TTC} = 1.81
989             ...
990              
991             NO TEST
992              
993             =cut
994              
995             sub generate_RSCU_file
996             {
997 0     0 1 0 my ($self, @args) = @_;
998            
999 0         0 my ($seqobjs, $comments) = $self->_rearrange([qw(sequences comments)], @args);
1000              
1001 0 0       0 $self->throw('No codon table has been defined')
1002             unless $self->{codontable};
1003              
1004 0 0       0 $self->throw('Sequence set must be provided')
1005             unless ($seqobjs);
1006              
1007 0 0       0 $self->throw('Comment argument must be array reference')
1008             unless ref($comments) eq 'ARRAY';
1009            
1010 0         0 my $count_t = $self->codon_count(-input => $seqobjs);
1011            
1012 0         0 my $rscu_t = _generate_RSCU_table(
1013             $count_t,
1014             $self->{codontable},
1015             $self->{reversecodontable}
1016             );
1017            
1018 0         0 my $report = _generate_codon_file(
1019             $rscu_t,
1020             $self->{reversecodontable},
1021             $comments
1022             );
1023            
1024 0         0 return $report;
1025             }
1026              
1027             =head2 list_enzyme_sets
1028              
1029             my @available_enzlists = $GD->list_enzyme_sets();
1030             # @available_enzlists == ('standard_and_IIB', 'blunts', 'IIB', 'nonpal', ...)
1031              
1032             Returns an array containing the names of every restriction enzyme recognition
1033             list GeneDesign knows about.
1034              
1035             =cut
1036              
1037             sub list_enzyme_sets
1038             {
1039 0     0 1 0 my ($self) = @_;
1040 0         0 my $epath = $self->{conf} . 'enzymes/';
1041 0         0 my @sets = ();
1042 0 0       0 opendir (my $ENZDIR, $epath) || $self->throw("can't opendir $epath");
1043 0         0 foreach my $list (readdir($ENZDIR))
1044             {
1045 0         0 my $name = basename($list);
1046 0         0 $name =~ s{\.[^.]+$}{}x;
1047 0 0 0     0 next if ($name eq q{.} || $name eq q{..});
1048 0         0 push @sets, $name;
1049             }
1050 0         0 closedir($ENZDIR);
1051 0         0 return @sets;
1052             }
1053              
1054             =head2 set_restriction_enzymes
1055            
1056             $GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1057              
1058             or
1059              
1060             $GD->set_restriction_enzymes(-list_path => '/path/to/enzyme_file');
1061              
1062             or even
1063              
1064             $GD->set_restriction_enzymes(
1065             -list_path => '/path/to/enzyme_file',
1066             -enzyme_set => 'custom_enzymes'
1067             );
1068              
1069             All will return a hash structure full of restriction enzymes.
1070              
1071             Tell GeneDesign which set of restriction enzymes to use. If you provide only a
1072             set name with the -enzyme_set flag, GeneDesign will check its config path for a
1073             matching file. Otherwise you must provide a path to a file (and optionally a
1074             name for the set).
1075            
1076             =cut
1077              
1078             sub set_restriction_enzymes
1079             {
1080 3     3 1 675 my ($self, @args) = @_;
1081            
1082 3         18 my ($set_name, $set_path)
1083             = $self->_rearrange([qw(enzyme_set list_path)], @args);
1084            
1085 3         70 my $def = 'all_enzymes';
1086 3         13 my $defpath = $self->{conf} . 'enzymes/' . $def;
1087 3         22 $self->{all_enzymes} = _define_sites($defpath);
1088            
1089            
1090 3 50 33     347 if (! $set_name && ! $set_path)
    50 33        
    50          
1091             {
1092 0         0 $self->{enzyme_set} = $self->{all_enzymes};
1093 0         0 $self->{enzyme_set_name} = $def;
1094             }
1095             elsif ($set_name && ! $set_path)
1096             {
1097 0         0 $set_path = $self->{conf} . "enzymes/$set_name";
1098 0 0       0 $self->throw("No enzyme set found that matches $set_name\n")
1099             unless (-e $set_path);
1100 0         0 my $list = _parse_enzyme_list($set_path);
1101 0         0 my $set = {};
1102 0         0 foreach my $id (@$list)
1103             {
1104 0 0       0 if (! exists $self->{all_enzymes}->{$id})
1105             {
1106 0         0 warn "$id was not recognized as an enzyme... skipping\n";
1107 0         0 next;
1108             }
1109 0         0 $set->{$id} = $self->{all_enzymes}->{$id};
1110             }
1111 0         0 $self->{enzyme_set} = $set;
1112 0         0 $self->{enzyme_set_name} = $set_name;
1113             }
1114             elsif ($set_path)
1115             {
1116 3 50       139 $self->throw("No enzyme list found at $set_path\n")
1117             unless (-e $set_path);
1118 3         23 my $list = _parse_enzyme_list($set_path);
1119 3         10 my $set = {};
1120 3         13 foreach my $id (@$list)
1121             {
1122 87 50       309 if (! exists $self->{all_enzymes}->{$id})
1123             {
1124 0         0 warn "$id was not recognized as an enzyme... skipping\n";
1125 0         0 next;
1126             }
1127 87         295 $set->{$id} = $self->{all_enzymes}->{$id};
1128             }
1129 3         15 $self->{enzyme_set} = $set;
1130 3   33     250 $self->{enzyme_set_name} = $set_name || basename($set_path);
1131             }
1132 3         24 return $self->{enzyme_set};
1133             }
1134              
1135             =head2 remove_from_enzyme_set
1136              
1137             Removes a subset of enzymes from an enzyme list. This only happens in memory, no
1138             files will be altered. The argument is an array reference of enzyme names.
1139              
1140             $GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1141             $GD->remove_from_enzyme_set(-enzymes => ['SmaI', 'MlyI']);
1142              
1143             NO TEST
1144              
1145             =cut
1146              
1147             sub remove_from_enzyme_set
1148             {
1149 0     0 1 0 my ($self, @args) = @_;
1150            
1151 0         0 my ($enzes) = $self->_rearrange([qw(enzymes)], @args);
1152            
1153 0 0       0 return unless ($enzes);
1154              
1155 0 0       0 $self->throw("no enzyme set has been defined")
1156             unless $self->{enzyme_set};
1157            
1158 0         0 my @toremove = ();
1159 0         0 my $ref = ref ($enzes);
1160 0 0       0 if ($ref eq "ARRAY")
1161             {
1162 0         0 push @toremove, @$enzes;
1163             }
1164             else
1165             {
1166 0         0 push @toremove, $enzes;
1167             }
1168 0         0 foreach my $enz (@toremove)
1169             {
1170 0         0 my $eid = $enz;
1171 0         0 my $eref = ref $enz;
1172 0 0       0 if ($eref)
1173             {
1174 0 0       0 $self->throw("object in enzymes is not a " .
1175             "Bio::GeneDesign::RestrictionEnzyme object")
1176             unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme"));
1177 0         0 $eid = $enz->id;
1178             }
1179              
1180 0 0       0 if (exists $self->{enzyme_set}->{$eid})
1181             {
1182 0         0 delete $self->{enzyme_set}->{$eid};
1183             }
1184             }
1185 0         0 return;
1186             }
1187              
1188             =head2 add_to_enzyme_set
1189              
1190             Adds a subset of enzymes to an enzyme list. This only happens in memory, no
1191             files will be altered. The argument is an array reference of RestrictionEnzyme
1192             objects.
1193              
1194             #Grab all known enzymes
1195             my $allenz = $GD->set_restriction_enzymes(-enzyme_set => 'standard_and_IIB');
1196              
1197             #Pull out a few
1198             my @keepers = ($allenz->{'BmrI'}, $allenz->{'HphI'});
1199              
1200             #Give GeneDesign the enzyme set you want
1201             $GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1202              
1203             #Add the few enzymes it didn't have before
1204             $GD->add_to_enzyme_set(-enzymes => \@keepers);
1205              
1206             NO TEST
1207              
1208             =cut
1209              
1210             sub add_to_enzyme_set
1211             {
1212 0     0 1 0 my ($self, @args) = @_;
1213            
1214 0         0 my ($enzes) = $self->_rearrange([qw(enzymes)], @args);
1215            
1216 0 0       0 return unless ($enzes);
1217              
1218 0 0       0 $self->throw("no enzyme set has been defined")
1219             unless $self->{enzyme_set};
1220            
1221 0         0 my @toadds = ();
1222 0         0 my $ref = ref ($enzes);
1223 0 0       0 if ($ref eq "ARRAY")
1224             {
1225 0         0 push @toadds, @$enzes;
1226             }
1227             else
1228             {
1229 0         0 push @toadds, $enzes;
1230             }
1231 0         0 foreach my $enz (@toadds)
1232             {
1233 0 0       0 $self->throw("object in enzymes is not a " .
1234             "Bio::GeneDesign::RestrictionEnzyme object")
1235             unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme"));
1236              
1237 0 0       0 next if (exists $self->{enzyme_set}->{$enz->id});
1238 0         0 $self->{enzyme_set}->{$enz->id} = $enz;
1239             }
1240 0         0 return;
1241             }
1242              
1243             =head2 restriction_status
1244              
1245             =cut
1246              
1247             sub restriction_status
1248             {
1249 1     1 1 68837 my ($self, @args) = @_;
1250            
1251 1         13 my ($seq) = $self->_rearrange([qw(sequence)], @args);
1252              
1253 1 50       18 $self->throw("no enzyme set has been defined")
1254             unless $self->{enzyme_set};
1255            
1256 1 50       6 $self->throw("No arguments provided for set_restriction_enzymes")
1257             unless ($seq);
1258            
1259 1         7 my $str = $self->_stripdown($seq, q{}, 0);
1260 1         5 my @reslist = values %{$self->{enzyme_set}};
  1         9  
1261 1         7 return _define_site_status($str, \@reslist);
1262             }
1263              
1264             =head2 build_prefix_tree
1265              
1266             Take an array reference of nucleotide sequences (they can be strings, Bio::Seq
1267             objects, or Bio::GeneDesign::RestrictionEnzyme objects) and create a suffix
1268             tree. If you add the peptide flag, the sequences will be ambiguously translated
1269             before they are added to the suffix tree. Otherwise they will be ambiguously
1270             transcribed. It will add the reverse complement of any non peptide sequence as
1271             long as the reverse complement is different.
1272              
1273             my $tree = $GD->build_prefix_tree(-input => ['GGATCC']);
1274            
1275             my $ptree = $GD->build_prefix_tree(
1276             -input => ['GGCCNNNNNGGCC'],
1277             -peptide => 1
1278             );
1279            
1280             =cut
1281              
1282             sub build_prefix_tree
1283             {
1284 2     2 1 7013 my ($self, @args) = @_;
1285            
1286 2         19 my ($list, $pep) = $self->_rearrange([qw(input peptide)], @args);
1287              
1288 2 50       63 $self->throw("no input provided")
1289             unless ($list);
1290              
1291 2         16 my $tree = Bio::GeneDesign::PrefixTree->new();
1292            
1293 2         6 foreach my $seq (@$list)
1294             {
1295 10         23 my $base = $seq;
1296 10         15 my $id = $seq;
1297 10         25 my $ref = ref($seq);
1298 10 50       24 if ($ref)
1299             {
1300 10 50 33     183 $self->throw("object in input is not a Bio::Seq, Bio::SeqFeature, or " .
      33        
1301             "Bio::GeneDesign::RestrictionEnzyme object")
1302             unless ($seq->isa("Bio::Seq")
1303             || $seq->isa("Bio::SeqFeatureI")
1304             || $seq->isa("Bio::GeneDesign::RestrictionEnzyme")
1305             );
1306 10 50       42 $base = ref($seq->seq) ? $seq->seq->seq : $seq->seq;
1307 10         45 $id = $seq->id;
1308             }
1309            
1310 10 100       25 if ($pep)
1311             {
1312 4 50       17 $self->throw('No codon table has been defined')
1313             unless $self->{codontable};
1314            
1315 4         22 my @fpeptides = _amb_translation($base, $self->{codontable},
1316             't', $self->{amb_trans_memo});
1317 4         28 $tree->add_prefix($_, $id, $base) foreach (@fpeptides);
1318            
1319 4         24 my $esab = _complement($base, 1);
1320 4         13 my $lagcheck = $esab;
1321 4         26 while (substr($lagcheck, -1) eq "N")
1322             {
1323 0         0 $lagcheck = substr($lagcheck, 0, length($lagcheck) - 1);
1324             }
1325 4 100 66     38 if ($esab ne $base && $lagcheck eq $esab)
1326             {
1327 2         14 my @rpeptides = _amb_translation($esab, $self->{codontable},
1328             't', $self->{amb_trans_memo});
1329 2         30 $tree->add_prefix($_, $id, $esab) foreach (@rpeptides);
1330             }
1331             }
1332             else
1333             {
1334 6         24 my $fnucs = _amb_transcription($base);
1335 6         33 $tree->add_prefix($_, $id, undef) foreach (@$fnucs);
1336            
1337 6         22 my $esab = _complement($base, 1);
1338 6 100       31 if ($esab ne $base)
1339             {
1340 3         10 my $rnucs = _amb_transcription($esab);
1341 3         18 $tree->add_prefix($_, $id, undef) foreach (@$rnucs);
1342             }
1343             }
1344             }
1345 2         12 return $tree;
1346             }
1347              
1348             =head2 search_prefix_tree
1349              
1350             Takes a suffix tree and a sequence and searches for results, which are returned
1351             as in the Bio::GeneDesign::PrefixTree documentation.
1352              
1353             my $hits = $GD->search_prefix_tree(-tree => $ptree, -sequence => $mygeneseq);
1354            
1355             # @$hits = (['BamHI', 4, 'GGATCC', 'i hope this didn't pop up'],
1356             # ['OhnoI', 21, 'GGCCC', 'I hope these pop up'],
1357             # ['WoopsII', 21, 'GGCCC', 'I hope these pop up']
1358             #);
1359              
1360             =cut
1361              
1362             sub search_prefix_tree
1363             {
1364 2     2 1 186216 my ($self, @args) = @_;
1365            
1366 2         21 my ($tree, $seq) = $self->_rearrange([qw(tree sequence)], @args);
1367            
1368 2 50       93 $self->throw("no query sequence provided")
1369             unless ($seq);
1370            
1371 2 50       9 $self->throw("no suffix tree provided")
1372             unless ($tree);
1373            
1374 2 50       17 $self->throw("tree is not a Bio::GeneDesign::PrefixTree")
1375             unless ($tree->isa("Bio::GeneDesign::PrefixTree"));
1376            
1377 2         13 my $str = $self->_stripdown($seq, q{}, 0);
1378            
1379 2         16 my @hits = $tree->find_prefixes($str);
1380            
1381 2         19 return \@hits;
1382             }
1383              
1384             =head2 pattern_aligner
1385              
1386             =cut
1387              
1388             sub pattern_aligner
1389             {
1390 3     3 1 5578 my ($self, @args) = @_;
1391            
1392 3         21 my ($seq, $pattern, $peptide, $re)
1393             = $self->_rearrange([qw(sequence pattern peptide offset)], @args);
1394              
1395 3 50       100 $self->throw("no codon table has been defined")
1396             unless $self->{codontable};
1397            
1398 3 50       9 $self->throw("no nucleotide sequence provided")
1399             unless $seq;
1400              
1401 3   100     12 $re = $re || 0;
1402            
1403 3         12 my $str = $self->_stripdown($seq, q{}, 1);
1404            
1405 3   33     20 $peptide = $peptide || $self->translate(-sequence => $str);
1406            
1407 3         16 my ($newpattern, $offset) = _pattern_aligner($str,
1408             $pattern,
1409             $peptide,
1410             $self->{codontable},
1411             $self->{amb_trans_memo}
1412             );
1413 3 100       19 return $re ? ($newpattern, $offset) : $newpattern;
1414             }
1415              
1416             =head2 pattern_adder
1417              
1418             =cut
1419              
1420             sub pattern_adder
1421             {
1422 1     1 1 1917 my ($self, @args) = @_;
1423            
1424 1         8 my ($seq, $pattern) = $self->_rearrange([qw(sequence pattern)], @args);
1425              
1426 1 50       32 $self->throw("no codon table has been defined")
1427             unless $self->{codontable};
1428            
1429 1 50       6 $self->throw("no nucleotide sequence provided")
1430             unless $seq;
1431              
1432 1         5 my $str = $self->_stripdown($seq, q{}, 1);
1433 1         4 my $pat = $self->_stripdown($pattern, q{}, 1);
1434            
1435 1         9 my $newsequence = _pattern_adder($str,
1436             $pat,
1437             $self->{codontable},
1438             $self->{reversecodontable},
1439             $self->{amb_trans_memo}
1440             );
1441 1         6 return $newsequence;
1442             }
1443              
1444             =head2 codon_change_type
1445              
1446             =cut
1447              
1448             sub codon_change_type
1449             {
1450 0     0 1 0 my ($self, @args) = @_;
1451            
1452 0         0 my ($codold, $codnew) = $self->_rearrange([qw(from to)], @args);
1453              
1454 0 0       0 $self->throw("no codon table has been defined")
1455             unless $self->{codontable};
1456            
1457 0 0       0 $self->throw("no from sequence provided")
1458             unless $codold;
1459            
1460 0 0       0 $self->throw("no to sequence provided")
1461             unless $codnew;
1462            
1463 0         0 my $changetype = _codon_change_type($codold, $codnew, $self->{codontable});
1464 0         0 return $changetype;
1465             }
1466              
1467             =head2 translate
1468              
1469             =cut
1470              
1471             sub translate
1472             {
1473 9     9 1 5374 my ($self, @args) = @_;
1474            
1475 9         53 my ($seq, $frame) = $self->_rearrange([qw(sequence frame)], @args);
1476              
1477 9 50       242 $self->throw("no codon table has been defined")
1478             unless $self->{codontable};
1479            
1480 9 50       24 $self->throw("no nucleotide sequence provided")
1481             unless $seq;
1482            
1483 9         31 my $str = $self->_stripdown($seq, q{}, 1);
1484            
1485 9 100       23 $frame = $frame ? $frame : 1;
1486            
1487 9 50 33     52 $self->throw("frame must be -3, -2, -1, 1, 2, or 3")
1488             if (abs($frame) > 4 || abs($frame) < 0);
1489              
1490 9         43 my $peptide = _translate($str, $frame, $self->{codontable});
1491 9 100       28 if (ref $seq)
1492             {
1493 6         30 my $newobj = $seq->clone();
1494 6 50       308 my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1495 6         86 $desc = "translated with " . $self->{organism} . " codon values";
1496 6         18 $newobj->seq($peptide);
1497 6         428 $newobj->alphabet("protein");
1498 6         129 $newobj->desc($desc);
1499 6         78 return $newobj;
1500             }
1501             else
1502             {
1503 3         43 return $peptide;
1504             }
1505             }
1506              
1507             =head2 reverse_translate
1508              
1509             =cut
1510              
1511             sub reverse_translate
1512             {
1513 20001     20001 1 1778649 my ($self, @args) = @_;
1514            
1515 20001         85178 my ($pep, $algorithm)
1516             = $self->_rearrange([qw(
1517             peptide
1518             algorithm)], @args);
1519              
1520 20001 50       525176 $self->throw("no codon table has been defined")
1521             unless $self->{codontable};
1522            
1523 20001 50       49745 $self->throw("no RSCU table has been defined")
1524             unless $self->{rscutable};
1525            
1526 20001 50       42707 $self->throw("no peptide sequence provided")
1527             unless $pep;
1528              
1529 20001         47898 my $str = $self->_stripdown($pep, q{}, 1);
1530            
1531 20001         61284 my ($newstr, @baddies) = _sanitize($str, 'pep');
1532 20001         33322 $str = $newstr;
1533 20001 50       46631 if (scalar @baddies)
1534             {
1535 0         0 print "\nGD_WARNING: removed bad characters (", join q{, }, @baddies;
1536 0         0 print ") from input sequence\n";
1537             }
1538 20001   50     43918 $algorithm = $algorithm || "balanced";
1539 20001         29745 $algorithm = lc $algorithm;
1540 20001         31328 $algorithm =~ s{\;}{}xg;
1541 20001         35669 my $name = "_reversetranslate" . "_" . $algorithm;
1542 20001         46919 my $subref = \&$name;
1543 20001   33     78231 my $seq = &$subref($self->{reversecodontable}, $self->{rscutable}, $str)
1544             || $self->throw("Can't reverse translate with $algorithm? $!");
1545            
1546 20001 50       51611 if (ref $pep)
1547             {
1548 20001         68642 my $newobj = $pep->clone();
1549 20001 50       708335 my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1550 20001         289412 $desc .= "$algorithm reverse translated with " . $self->{organism};
1551 20001         29493 $desc .= " RSCU values";
1552 20001         58405 $newobj->seq($seq);
1553 20001         1319029 $newobj->desc($desc);
1554 20001         266311 return $newobj;
1555             }
1556             else
1557             {
1558 0         0 return $seq;
1559             }
1560             }
1561              
1562             =head2 codon_juggle
1563              
1564             =cut
1565              
1566             sub codon_juggle
1567             {
1568 20003     20003 1 932845 my ($self, @args) = @_;
1569            
1570 20003         85621 my ($seq, $algorithm)
1571             = $self->_rearrange([qw(
1572             sequence
1573             algorithm)], @args);
1574              
1575 20003 50       505492 $self->throw("no codon table has been defined")
1576             unless $self->{codontable};
1577            
1578 20003 50       44796 $self->throw("no RSCU table has been defined")
1579             unless $self->{rscutable};
1580              
1581 20003 50       36430 $self->throw("no nucleotide sequence provided")
1582             unless $seq;
1583              
1584              
1585 20003 50       33675 $self->throw("no algorithm provided")
1586             unless $algorithm;
1587            
1588 20003         43961 my $str = $self->_stripdown($seq, q{}, 0);
1589              
1590             ##REPLACE THIS WITH CODE THAT GRACEFULLY JUGGLES JUST CDSES OR GENES
1591 20003 50       51569 $self->throw("sequence does not appear to be the right length to be a gene")
1592             unless length($str) % 3 == 0;
1593              
1594 20003         29712 $algorithm = lc $algorithm;
1595 20003         34055 $algorithm =~ s/\W//xg;
1596 20003         31153 my $name = "_codonJuggle_" . $algorithm;
1597 20003         43082 my $subref = \&$name;
1598 20003   33     77284 my $newseq = &$subref($self->{codontable},
1599             $self->{reversecodontable},
1600             $self->{rscutable},
1601             $str
1602             ) || $self->throw("Can't run $algorithm? $!");
1603 20003 50       49303 if (ref $seq)
1604             {
1605 20003         74644 my $newobj = $seq->clone();
1606 20003 50       698613 my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1607 20003         283854 $desc .= "$algorithm codon juggled with " . $self->{organism};
1608 20003         30913 $desc .= " RSCU values";
1609 20003         56485 $newobj->seq($newseq);
1610 20003         1369061 $newobj->desc($desc);
1611 20003         262637 return $newobj;
1612             }
1613             else
1614             {
1615 0         0 return $newseq;
1616             }
1617             }
1618              
1619             =head2 subtract_sequence
1620              
1621             =cut
1622              
1623             sub subtract_sequence
1624             {
1625 3     3 1 2660 my ($self, @args) = @_;
1626              
1627 3         19 my ($seq, $rem) = $self->_rearrange([qw(sequence remove)], @args);
1628              
1629 3 50       80 $self->throw("no codon table has been defined")
1630             unless $self->{codontable};
1631            
1632 3 50       13 $self->throw("no RSCU table has been defined")
1633             unless $self->{rscutable};
1634              
1635 3 50       10 $self->throw("no nucleotide sequence provided")
1636             unless $seq;
1637            
1638 3 50       12 $self->throw("no sequence to be removed was defined")
1639             unless ($rem);
1640              
1641 3         14 my $str = $self->_stripdown($seq, q{}, 0);
1642            
1643 3         6 my $regarr;
1644 3         6 my $less = $rem;
1645 3 50       11 if (ref($rem))
1646             {
1647 3 100 66     37 if ($rem->isa("Bio::Seq") || $rem->isa("Bio::SeqFeatureI"))
    50          
1648             {
1649 2 50       7 $less = ref($rem->seq) ? $rem->seq->seq : $rem->seq;
1650 2         95 $regarr = _regarr($less, 1);
1651             }
1652             elsif ($rem->isa("Bio::GeneDesign::RestrictionEnzyme"))
1653             {
1654 1         11 $less = $rem->seq;
1655 1         8 $regarr = $rem->regex;
1656             }
1657             else
1658             {
1659 0         0 $self->throw("removal argument is not a Bio::Seq, Bio::SeqFeature, or "
1660             . "Bio::GeneDesign::RestrictionEnzyme");
1661             }
1662             }
1663             else
1664             {
1665 0         0 $regarr = _regarr($less, 1);
1666             }
1667            
1668 3         30 my $newseq = _subtract( uc $str,
1669             uc $less,
1670             $regarr,
1671             $self->{codontable},
1672             $self->{rscutable},
1673             $self->{reversecodontable}
1674             );
1675            
1676 3 50       15 if (ref $seq)
1677             {
1678 3         29 my $newobj = $seq->clone();
1679 3 50       171 my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1680 3         70 $desc .= $rem->id . " subtracted with " . $self->{organism};
1681 3         43 $desc .= " RSCU values";
1682 3         14 $newobj->seq($newseq);
1683 3         278 $newobj->desc($desc);
1684 3         57 return $newobj;
1685             }
1686             else
1687             {
1688 0         0 return $newseq;
1689             }
1690             }
1691              
1692             =head2 repeat_smash
1693              
1694             =cut
1695              
1696             sub repeat_smash
1697             {
1698 0     0 1 0 my ($self, @args) = @_;
1699            
1700 0         0 my ($seq) = $self->_rearrange([qw(sequence)], @args);
1701              
1702 0 0       0 $self->throw("no codon table has been defined")
1703             unless $self->{codontable};
1704            
1705 0 0       0 $self->throw("no RSCU table has been defined")
1706             unless $self->{rscutable};
1707              
1708 0 0       0 $self->throw("no nucleotide sequence provided")
1709             unless $seq;
1710            
1711 0         0 my $str = $self->_stripdown($seq, q{}, 0);
1712            
1713 0         0 return _minimize_local_alignment_dp(
1714             $str,
1715             $self->{codontable},
1716             $self->{reversecodontable},
1717             $self->{rscutable}
1718             );
1719             }
1720              
1721             =head2 make_amplification_primers
1722              
1723             NO TEST
1724              
1725             =cut
1726              
1727             sub make_amplification_primers
1728             {
1729 0     0 1 0 my ($self, @args) = @_;
1730            
1731 0         0 my ($seq, $temp) = $self->_rearrange([qw(sequence temperature)], @args);
1732              
1733 0 0       0 $self->throw("no sequence provided") unless ($seq);
1734 0   0     0 $temp = $temp || 60;
1735            
1736 0         0 my $str = $self->_stripdown($seq, q{}, 0);
1737            
1738 0         0 return _make_amplification_primers($str, $temp);
1739             }
1740              
1741             =head2 contains_homopolymer
1742              
1743             Returns 1 if the sequence contains a homopolymer of the provided length (default
1744             is 5bp) and 0 else
1745              
1746             =cut
1747              
1748             sub contains_homopolymer
1749             {
1750 0     0 1 0 my ($self, @args) = @_;
1751            
1752 0         0 my ($seq, $length) = $self->_rearrange([qw(sequence length)], @args);
1753              
1754 0 0       0 $self->throw("no sequence provided") unless ($seq);
1755 0   0     0 $length = $length || 5;
1756            
1757 0         0 my $str = $self->_stripdown($seq, q{}, 0);
1758            
1759 0         0 return _check_for_homopolymer($str, $length);
1760             }
1761              
1762             =head2 filter_homopolymers
1763              
1764             =cut
1765              
1766             sub filter_homopolymers
1767             {
1768 0     0 1 0 my ($self, @args) = @_;
1769            
1770 0         0 my ($seqs, $length) = $self->_rearrange([qw(sequences length)], @args);
1771              
1772 0 0       0 $self->throw("no argument provided to filter_homopolymers")
1773             unless $seqs;
1774 0   0     0 $length = $length || 5;
1775              
1776 0         0 my $arrref = $self->_stripdown($seqs, 'ARRAY', 1);
1777            
1778 0         0 my $seqarr = _filter_homopolymer( $arrref, $length);
1779 0         0 return $seqarr;
1780             }
1781              
1782             =head2 search_vmatch
1783              
1784             =cut
1785              
1786             sub search_vmatch
1787             {
1788 0     0 1 0 my ($self, @args) = @_;
1789            
1790 0 0       0 $self->throw("vmatch searching is not available")
1791             unless $self->vmatch;
1792              
1793 0         0 my ($parent, $seqobj, $mismatches, $revcom)
1794             = $self->_rearrange([qw(
1795             parent
1796             sequence
1797             mismatches
1798             revcom)], @args);
1799              
1800 0 0       0 $self->throw("no nucleotide sequence provided")
1801             unless $seqobj;
1802              
1803 0 0       0 $self->throw("sequence argument is not a Bio::Seq object")
1804             unless $seqobj->isa("Bio::Seq");
1805            
1806 0 0       0 $self->throw("$seqobj is not a nucleotide sequence")
1807             unless $seqobj->alphabet eq "dna";
1808            
1809 0 0       0 $self->throw("no parent sequence provided")
1810             unless $parent;
1811              
1812 0 0       0 $self->throw("parent argument is not a Bio::Seq object")
1813             unless $parent->isa("Bio::Seq");
1814              
1815 0 0       0 $self->throw("$parent is not a nucleotide sequence")
1816             unless $parent->alphabet eq "dna";
1817            
1818 0   0     0 my $writedir = $self->{tmp_path} || "./";
1819 0   0     0 $mismatches = $mismatches || 1;
1820 0   0     0 $revcom = $revcom || 1;
1821              
1822 0         0 my $hits = _search_vmatch( $parent,
1823             $seqobj,
1824             $mismatches,
1825             $revcom,
1826             $writedir);
1827 0         0 return $hits;
1828             }
1829              
1830             =head2 filter_blast
1831              
1832             =cut
1833              
1834             sub filter_blast
1835             {
1836 0     0 1 0 my ($self, @args) = @_;
1837            
1838 0 0       0 $self->throw("BLAST+ filtering is not available")
1839             unless $self->BLAST;
1840              
1841 0         0 my ($parent, $seqobjs, $percent, $revcom)
1842             = $self->_rearrange([qw(
1843             parent
1844             sequences
1845             percent_identity
1846             revcom)], @args);
1847              
1848 0 0       0 $self->throw("no nucleotide sequences provided")
1849             unless $seqobjs;
1850              
1851 0 0       0 $self->throw("sequences argument is not an array reference")
1852             unless ref($seqobjs) eq "ARRAY";
1853            
1854 0         0 foreach my $seqobj (@$seqobjs)
1855             {
1856 0 0       0 $self->throw(ref($seqobj) . " is not a Bio::Seq object")
1857             unless $seqobj->isa("Bio::Seq");
1858              
1859 0 0       0 $self->throw("$seqobj is not a nucleotide sequence")
1860             unless $seqobj->alphabet eq "dna";
1861             }
1862            
1863 0 0       0 $self->throw("no parent sequence provided")
1864             unless $parent;
1865              
1866 0 0       0 $self->throw(ref($parent) . " is not a Bio::Seq object")
1867             unless $parent->isa("Bio::Seq");
1868              
1869 0 0       0 $self->throw("$parent is not a nucleotide sequence")
1870             unless $parent->alphabet eq "dna";
1871            
1872 0   0     0 my $writedir = $self->{tmp_path} || "./";
1873 0   0     0 $percent = $percent || 75;
1874 0   0     0 $revcom = $revcom || 1;
1875              
1876 0         0 my $seqarr = _filter_blast( $parent,
1877             $seqobjs,
1878             $percent,
1879             $writedir);
1880 0         0 return $seqarr;
1881             }
1882              
1883             =head2 carve_building_blocks
1884              
1885             NO TEST
1886              
1887             =cut
1888              
1889             sub carve_building_blocks
1890             {
1891 0     0 1 0 my ($self, @args) = @_;
1892 0         0 my ($chobj, $algorithm, $tarbblen, $maxbblen,
1893             $minbblen, $tarbblap, $stitch, $excludes,
1894             $fpexcludes, $tpexcludes, $verbose)
1895             = $self->_rearrange([qw(
1896             sequence
1897             algorithm
1898             target_length
1899             max_length
1900             min_length
1901             overlap_length
1902             stitch_temp
1903             excludes
1904             fpexcludes
1905             tpexcludes
1906             verbose)], @args);
1907              
1908 0 0       0 $self->throw("no nucleotide sequence provided")
1909             unless $chobj;
1910              
1911 0 0       0 $self->throw("object " . ref($chobj) . " is not a Bio::Seq")
1912             unless $chobj->isa("Bio::Seq");
1913              
1914 0   0     0 $tarbblen = $tarbblen || 1000;
1915 0   0     0 $maxbblen = $maxbblen || 1023;
1916 0   0     0 $minbblen = $minbblen || 200;
1917              
1918 0 0 0     0 $self->throw("Target building block size $tarbblen is outside of allowable " .
1919             "range min $minbblen to max $maxbblen")
1920             if ($tarbblen < $minbblen || $tarbblen > $maxbblen);
1921            
1922 0 0 0     0 $self->throw("Minimum and maximum building block sizes don't make sense")
      0        
1923             if ($maxbblen < $minbblen || $maxbblen < 0 || $minbblen < 0 );
1924            
1925 0   0     0 $tarbblap = $tarbblap || 40;
1926            
1927 0   0     0 $algorithm = $algorithm || "overlap";
1928 0         0 $algorithm =~ s/\;//xg;
1929 0         0 my $module = "Bio::GeneDesign::BuildingBlocks::$algorithm";
1930 0         0 (my $require_name = $module . ".pm") =~ s{::}{/}xg;
1931             my $req = eval
1932 0         0 {
1933 0         0 require $require_name;
1934             };
1935 0 0       0 if (! $req)
1936             {
1937 0         0 $self->throw("BuildingBlocks module for $algorithm not found\n$@");
1938             }
1939 0         0 my $name = $module . "::_carve_building_blocks";
1940 0         0 my $subref = \&$name;
1941 0         0 my $BB_list = [];
1942             my $run = eval
1943 0         0 {
1944 0         0 $BB_list = &$subref( $self, $chobj, $tarbblen, $maxbblen,
1945             $minbblen, $tarbblap, $stitch,
1946             $excludes, $fpexcludes, $tpexcludes,
1947             $verbose
1948             );
1949             };
1950 0         0 my $e;
1951 0 0       0 if ($e = Bio::GeneDesign::Exception::UnBBable->caught())
1952             {
1953 0         0 print $chobj->id . " cannot be carved into building blocks: "
1954             . $e->error . "\n";
1955             }
1956 0         0 return $BB_list;
1957             }
1958              
1959             =head2 chop_oligos
1960              
1961             NO TEST
1962              
1963             =cut
1964              
1965             sub chop_oligos
1966             {
1967 0     0 1 0 my ($self, @args) = @_;
1968            
1969 0         0 my ($bbobj, $algorithm, $olilenmin, $olilenmax, $olilen, $laptemp, $laplenmin,
1970             $tmtol, $poolsize, $poolnummax, $verbose)
1971             = $self->_rearrange([qw(
1972             building_block
1973             algorithm
1974             oligo_len_min
1975             oligo_len_max
1976             oligo_len
1977             overlap_tm
1978             overlap_len_min
1979             tm_tolerance
1980             pool_size
1981             pool_num_max
1982             verbose)], @args);
1983              
1984 0 0       0 $self->throw("no building block provided")
1985             unless $bbobj;
1986              
1987 0 0       0 $self->throw("object " . ref($bbobj) . " is not a Bio::SeqFeatureI")
1988             unless $bbobj->isa("Bio::SeqFeatureI");
1989              
1990 0   0     0 $olilen = $olilen || 180;
1991 0   0     0 $olilenmin = $olilenmin || 45;
1992 0   0     0 $olilenmax = $olilenmax || 200;
1993 0   0     0 $laptemp = $laptemp || 70;
1994 0   0     0 $tmtol = $tmtol || .5;
1995 0   0     0 $verbose = $verbose || undef;
1996              
1997 0 0 0     0 $self->throw("maximum oligo length is less than the target oligo length")
      0        
1998             if ($olilenmax && $olilen && $olilenmax < $olilen);
1999            
2000 0   0     0 $algorithm = $algorithm || "JGI";
2001 0         0 $algorithm =~ s/\;//xg;
2002 0         0 my $module = "Bio::GeneDesign::Oligos::$algorithm";
2003 0         0 (my $require_name = $module . ".pm") =~ s{::}{/}xg;
2004             my $req = eval
2005 0         0 {
2006 0         0 require $require_name;
2007             };
2008 0 0       0 if (! $req)
2009             {
2010 0         0 $self->throw("Oligos module for algorithm $algorithm not found:\n$@\n\n");
2011             }
2012 0         0 my $name = $module . "::_chop_oligos";
2013 0         0 my $subref = \&$name;
2014 0         0 my $OL_list = [];
2015             my $run = eval
2016 0         0 {
2017 0         0 $OL_list = &$subref( $self, $bbobj, $olilenmax, $olilenmin, $olilen,
2018             $laptemp, $laplenmin, $tmtol, $poolsize,
2019             $poolnummax, $verbose
2020             );
2021             };
2022 0         0 my $e;
2023 0 0       0 if ($e = Bio::GeneDesign::Exception::UnOLable->caught())
2024             {
2025 0         0 print "Cannot chop into oligos: " . $e->error . "\n";
2026             }
2027 0         0 return $OL_list;
2028             }
2029              
2030             =head2 make_graph
2031              
2032             =cut
2033              
2034             sub make_graph
2035             {
2036 0     0 1 0 my ($self, @args) = @_;
2037            
2038 0 0       0 $self->throw("Graphing is not available")
2039             unless $self->{graph};
2040            
2041 0         0 my ($seqobjs, $window)
2042             = $self->_rearrange([qw(sequences window)], @args);
2043              
2044 0 0       0 $self->throw("no codon table has been defined")
2045             unless $self->{codontable};
2046            
2047 0 0       0 $self->throw("no RSCU table has been defined")
2048             unless $self->{rscutable};
2049              
2050 0 0       0 $self->throw("no nucleotide sequences provided")
2051             unless $seqobjs;
2052              
2053 0 0       0 $self->throw("sequences argument is not an array reference")
2054             unless ref($seqobjs) eq "ARRAY";
2055            
2056 0         0 foreach my $seqobj (@$seqobjs)
2057             {
2058 0 0       0 $self->throw(ref($seqobj) . " is not a Bio::Seq object")
2059             unless $seqobj->isa("Bio::Seq");
2060              
2061 0 0       0 $self->throw("$seqobj is not a nucleotide sequence")
2062             unless $seqobj->alphabet eq "dna";
2063             }
2064            
2065 0         0 my ($graph, $format) = _make_graph( $seqobjs,
2066             $window,
2067             $self->{organism},
2068             $self->{codontable},
2069             $self->{rscutable},
2070             $self->{reversecodontable});
2071 0         0 return ($graph, $format);
2072             }
2073              
2074             =head2 make_dotplot
2075              
2076             =cut
2077              
2078             sub make_dotplot
2079             {
2080 0     0 1 0 my ($self, @args) = @_;
2081            
2082 0 0       0 $self->throw("Graphing is not available")
2083             unless $self->{graph};
2084            
2085 0         0 my ($seq1, $seq2, $window, $stringency)
2086             = $self->_rearrange([qw(first second window stringency)], @args);
2087              
2088 0   0     0 $window = $window || 10;
2089 0   0     0 $stringency = $stringency || 10;
2090            
2091 0 0 0     0 $self->throw("no nucleotide sequences provided")
2092             unless ($seq1 && $seq2);
2093            
2094 0         0 foreach my $seqobj ($seq1, $seq2)
2095             {
2096 0 0       0 $self->throw(ref($seqobj) . " is not a Bio::Seq object")
2097             unless $seqobj->isa("Bio::Seq");
2098              
2099 0 0       0 $self->throw("$seqobj is not a nucleotide sequence")
2100             unless $seqobj->alphabet eq "dna";
2101             }
2102            
2103 0         0 my $graph = _dotplot(
2104             $seq1->seq,
2105             $seq2->seq,
2106             $window,
2107             $stringency
2108             );
2109 0         0 return $graph;
2110             }
2111              
2112             =head2 import_seqs
2113              
2114             NO TEST
2115              
2116             =cut
2117              
2118             sub import_seqs
2119             {
2120 1     1 1 406 my ($self, $path) = @_;
2121 1 50       22 $self->throw("$path does not exist.")
2122             if (! -e $path);
2123 1   33     11 my $iterator = Bio::SeqIO->new(-file => $path)
2124             || $self->throw("Can't parse $path!");
2125            
2126 1         7655 my ($filename, $dirs, $suffix) = fileparse($path, qr/\.[^.]*/x);
2127 1 50       11 $suffix = substr($suffix, 1) if (substr($suffix, 0, 1) eq ".");
2128 1 50       7 $suffix = 'fasta' if ($suffix eq 'fa');
2129            
2130 1         6 return ($iterator, $filename, $suffix);
2131             }
2132              
2133             =head2 export_seqs
2134              
2135             NO TEST
2136              
2137             =cut
2138              
2139             sub export_seqs
2140             {
2141 0     0 1 0 my ($self, @args) = @_;
2142            
2143 0         0 my ($filename, $outpath, $outformat, $seqarr)
2144             = $self->_rearrange([qw(
2145             filename
2146             path
2147             format
2148             sequences)], @args);
2149              
2150 0 0       0 $outpath = $outpath ? $outpath : ".";
2151 0 0       0 $outpath .= "/" if (substr($outpath, -1, 1) !~ /[ \/ ]/x);
2152 0 0       0 $self->throw("$outpath does not exist") if (! -e $outpath);
2153              
2154 0 0       0 $outformat = $outformat ? $outformat : "genbank";
2155 0         0 my $module = "Bio::SeqIO::$outformat";
2156 0         0 (my $require_name = $module . ".pm") =~ s{::}{/}xg;
2157             my $flag = eval
2158 0         0 {
2159 0         0 require $require_name;
2160             };
2161 0 0 0     0 $self->throw("$outformat is not a format recognized by BioPerl")
2162             unless ($outformat && $flag);
2163            
2164             #Long attributes that come in from a genbank file will have corruption
2165             #remove spaces and reattribute to fix bbs in genbank file ):
2166 0 0       0 if ($outformat eq 'genbank')
2167             {
2168 0         0 foreach my $seq (@{$seqarr})
  0         0  
2169             {
2170 0         0 foreach my $feat ($seq->get_SeqFeatures)
2171             {
2172 0         0 foreach my $tag ($feat->get_all_tags())
2173             {
2174 0         0 my $value = join(q{}, $feat->get_tag_values($tag));
2175 0         0 $value =~ s/\s//xg;
2176 0         0 $feat->remove_tag($tag);
2177 0         0 $feat->add_tag_value($tag, $value);
2178             }
2179             }
2180             }
2181             }
2182 0         0 my $path = $outpath . $filename;
2183 0 0       0 open (my $OUTFH, '>', $path ) || $self->throw("Can't write to $path? $!");
2184 0         0 my $FOUT = Bio::SeqIO->new(-fh => $OUTFH, -format => $outformat);
2185 0         0 $FOUT->write_seq($_) foreach (@$seqarr);
2186 0         0 close $OUTFH;
2187 0         0 return;
2188             }
2189              
2190             =head2 random_dna
2191              
2192             =cut
2193              
2194             sub random_dna
2195             {
2196 30000     30000 1 252220 my ($self, @args) = @_;
2197            
2198 30000         124475 my ($rlen, $rgc, $rstop)
2199             = $self->_rearrange([qw(
2200             length
2201             gc_percentage
2202             no_stops)], @args);
2203              
2204 30000 50 33     735445 $self->throw("no codon table has been defined")
2205             if ($rstop && ! $self->{codontable});
2206              
2207 30000   100     73576 $rgc = $rgc || 50;
2208 30000 50 33     159628 $self->throw("gc_percentage must be between 0 and 100")
      33        
2209             if ($rgc && ($rgc < 0 || $rgc > 100));
2210              
2211 30000 50 33     145810 if (! $rlen || $rlen < 1)
    50          
2212             {
2213 0         0 return q{};
2214             }
2215             elsif ($rlen == 1)
2216             {
2217 30000 50       103832 return $rgc ? _randombase_weighted($rgc) : _randombase;
2218             }
2219 0         0 return _randomDNA($rlen, $rgc, $rstop, $self->{codontable});
2220             }
2221              
2222             =head2 replace_ambiguous_bases
2223              
2224             =cut
2225              
2226             sub replace_ambiguous_bases
2227             {
2228 3     3 1 18 my ($self, $seq) = @_;
2229            
2230 3 50       8 $self->throw("no sequence provided ")
2231             unless ($seq);
2232              
2233 3         8 my $str = $self->_stripdown($seq, q{}, 1);
2234              
2235 3         13 my $newstr = _replace_ambiguous_bases($str);
2236            
2237 3 50       9 if (ref $seq)
2238             {
2239 0         0 my $newobj = $seq->clone();
2240 0 0       0 my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
2241 0         0 $desc .= "deambiguated";
2242 0         0 $newobj->seq($newstr);
2243 0         0 $newobj->desc($desc);
2244 0         0 return $newobj;
2245             }
2246             else
2247             {
2248 3         10 return $newstr;
2249             }
2250             }
2251              
2252             =head1 PLEASANTRIES
2253              
2254             =head2 pad
2255              
2256             my $name = 5;
2257             my $nice = $GD->pad($name, 3);
2258             $nice == "005" || die;
2259              
2260             $name = "oligo";
2261             $nice = $GD->pad($name, 7, "_");
2262             $nice == "__oligo" || die;
2263            
2264             Pads an integer with leading zeroes (by default) or any provided set of
2265             characters. This is useful both to make reports pretty and to standardize the
2266             length of designations.
2267              
2268             =cut
2269              
2270             sub pad
2271             {
2272 0     0 1 0 my ($self, $num, $thickness, $chars) = @_;
2273 0         0 my $t = $num;
2274 0   0     0 $chars = $chars || "0";
2275 0         0 $t = $chars . $t while (length($t) < $thickness);
2276 0         0 return $t;
2277             }
2278              
2279             =head2 attitude
2280              
2281             my $adverb = $GD->attitude();
2282            
2283             Ask GeneDesign how it handled your request.
2284            
2285             =cut
2286              
2287             sub attitude
2288             {
2289 0     0 1 0 my @adverbs = qw(Elegantly Energetically Enthusiastically Excitedly Daintily
2290             Deliberately Diligently Dreamily Courageously Cooly Cleverly Cheerfully
2291             Carefully Calmly Briskly Blindly Bashfully Absentmindedly Awkwardly
2292             Faithfully Ferociously Fervently Fiercely Fondly Gently Gleefully Gratefully
2293             Gracefully Happily Helpfully Heroically Honestly Joyfully Jubilantly
2294             Jovially Keenly Kindly Knowingly Kookily Loftily Lovingly Loyally
2295             Majestically Mechanically Merrily Mostly Neatly Nicely Obediently Officially
2296             Optimistically Patiently Perfectly Playfully Positively Powerfully
2297             Punctually Properly Promptly Quaintly Quickly Quirkily Rapidly Readily
2298             Reassuringly Righteously Sedately Seriously Sharply Shyly Silently Smoothly
2299             Solemnly Speedily Strictly Successfully Suddenly Sweetly Swiftly Tenderly
2300             Thankfully Throroughly Thoughtfully Triumphantly Ultimately Unabashedly
2301             Utterly Upliftingly Urgently Usefully Valiantly Victoriously Vivaciously
2302             Warmly Wholly Wisely Wonderfully Yawningly Zealously Zestfully
2303             );
2304 0         0 my $index = _random_index(scalar(@adverbs));
2305 0         0 return $adverbs[$index];
2306             }
2307              
2308             =head2 endslash
2309              
2310             =cut
2311              
2312             sub endslash
2313             {
2314 0     0 1 0 my ($self, $path) = @_;
2315 0 0       0 if (substr($path, -1, 1) ne q{/})
2316             {
2317 0         0 $path .= q{/};
2318             }
2319 0         0 return $path;
2320             }
2321              
2322             =head2 _stripdown
2323              
2324             =cut
2325              
2326             sub _stripdown
2327             {
2328 40059     40059   62111 my ($self, $seqarg, $type, $enz_allowed) = @_;
2329              
2330 40059   100     121728 $enz_allowed = $enz_allowed || 0;
2331 40059 100       114096 my @seqs = ref($seqarg) eq "ARRAY" ? @$seqarg : ($seqarg);
2332 40059         45371 my @list;
2333 40059         64486 foreach my $seq (@seqs)
2334             {
2335 40059         48241 my $str = $seq;
2336 40059         52050 my $ref = ref($seq);
2337 40059 100       77438 if ($ref)
2338             {
2339 40027         82807 my $bit = $self->_checkref($seq, $enz_allowed);
2340 40027 50       92721 $self->throw("object $ref is not a compatible object $bit") if ($bit < 1);
2341 40027 50       109572 $str = ref($seq->seq) ? $seq->seq->seq : $seq->seq;
2342             }
2343 40059         1073206 push @list, uc $str;
2344             }
2345 40059 100       102547 return \@list if ($type eq "ARRAY");
2346 40056         116332 return $list[0];
2347             }
2348              
2349             =head2 _checkref
2350              
2351             =cut
2352              
2353             sub _checkref
2354             {
2355 40027     40027   56173 my ($self, $pobj, $enz_allowed) = @_;
2356 40027         50510 my $ref = ref $pobj;
2357 40027 50       76121 return -1 if (! $ref);
2358 40027   100     116398 $enz_allowed = $enz_allowed || 0;
2359 40027         58857 my ($bioseq, $bioseqfeat) = (0, 0);
2360 40027         112163 $bioseq = $pobj->isa("Bio::Seq");
2361 40027         166356 $bioseqfeat = $pobj->isa("Bio::SeqFeatureI");
2362 40027 100       83598 if ($enz_allowed)
2363             {
2364 20016         102778 $enz_allowed = $pobj->isa("Bio::GeneDesign::RestrictionEnzyme");
2365             }
2366 40027         97527 return $bioseq + $bioseqfeat + $enz_allowed;
2367             }
2368              
2369             1;
2370              
2371             __END__
2372              
2373             =head1 COPYRIGHT AND LICENSE
2374              
2375             Copyright (c) 2013, GeneDesign developers
2376             All rights reserved.
2377              
2378             Redistribution and use in source and binary forms, with or without modification,
2379             are permitted provided that the following conditions are met:
2380              
2381             * Redistributions of source code must retain the above copyright notice, this
2382             list of conditions and the following disclaimer.
2383              
2384             * Redistributions in binary form must reproduce the above copyright notice, this
2385             list of conditions and the following disclaimer in the documentation and/or
2386             other materials provided with the distribution.
2387              
2388             * The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley
2389             National Laboratory, the Department of Energy, and the GeneDesign developers may
2390             not be used to endorse or promote products derived from this software without
2391             specific prior written permission.
2392              
2393             THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
2394             ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
2395             WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
2396             DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
2397             INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2398             LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
2399             PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
2400             LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
2401             OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
2402             ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2403              
2404             =cut