File Coverage

blib/lib/Bio/GeneDesign.pm
Criterion Covered Total %
statement 391 629 62.1
branch 117 318 36.7
condition 33 98 33.6
subroutine 53 84 63.1
pod 64 65 98.4
total 658 1194 55.1


line stmt bran cond sub pod time code
1             #
2             # GeneDesign engine
3             #
4              
5             =head1 NAME
6              
7             Bio::GeneDesign
8              
9             =head1 VERSION
10              
11             Version 5.56
12              
13             =head1 DESCRIPTION
14              
15             =head1 AUTHOR
16              
17             Sarah Richardson
18              
19             =cut
20              
21             package Bio::GeneDesign;
22 11     11   973986 use base qw(Bio::Root::Root);
  11         107  
  11         5357  
23              
24 11     11   425477 use Bio::GeneDesign::ConfigData;
  11         24  
  11         398  
25 11     11   4331 use Bio::GeneDesign::Basic qw(:GD);
  11         30  
  11         2317  
26 11     11   4283 use Bio::GeneDesign::IO qw(:GD);
  11         27  
  11         1652  
27 11     11   4590 use Bio::GeneDesign::CodonJuggle qw(:GD);
  11         28  
  11         1465  
28 11     11   76 use Bio::GeneDesign::Codons qw(:GD);
  11         21  
  11         1480  
29 11     11   5574 use Bio::GeneDesign::Oligo qw(:GD);
  11         28  
  11         1162  
30 11     11   72 use Bio::GeneDesign::Random qw(:GD);
  11         16  
  11         997  
31 11     11   4358 use Bio::GeneDesign::RestrictionEnzymes qw(:GD);
  11         27  
  11         1205  
32 11     11   4323 use Bio::GeneDesign::ReverseTranslate qw(:GD);
  11         23  
  11         1058  
33 11     11   4150 use Bio::GeneDesign::PrefixTree;
  11         27  
  11         300  
34 11     11   5826 use Bio::SeqFeature::Generic;
  11         539526  
  11         361  
35 11     11   97 use File::Basename;
  11         26  
  11         694  
36 11     11   67 use Bio::Seq;
  11         22  
  11         164  
37 11     11   50 use Carp;
  11         20  
  11         463  
38              
39 11     11   58 use strict;
  11         20  
  11         190  
40 11     11   52 use warnings;
  11         21  
  11         74640  
41              
42             my $VERSION = 5.56;
43              
44             =head1 CONSTRUCTORS
45              
46             =head2 new
47              
48             Returns an initialized Bio::GeneDesign object.
49              
50             This function reads the ConfigData written at installation, imports the
51             relevant sublibraries, and sets the relevant paths.
52              
53             my $GD = Bio::GeneDesign->new();
54              
55             =cut
56              
57             sub new
58             {
59 9     9 1 995 my ($class, @args) = @_;
60 9         105 my $self = $class->SUPER::new(@args);
61 9         112 bless $self, $class;
62              
63 9         69 $self->{script_path} = Bio::GeneDesign::ConfigData->config('script_path');
64 9         36 $self->{conf} = Bio::GeneDesign::ConfigData->config('conf_path');
65 9 50       52 $self->{conf} .= q{/} unless substr($self->{conf}, -1, 1) eq q{/};
66              
67 9         33 $self->{tmp_path} = Bio::GeneDesign::ConfigData->config('tmp_path');
68 9 50       39 $self->{tmp_path} .= q{/} unless substr($self->{tmp_path}, -1, 1) eq q{/};
69              
70 9         29 $self->{graph} = Bio::GeneDesign::ConfigData->config('graphing_support');
71 9 50       35 if ($self->{graph} > 0)
72             {
73 0         0 require Bio::GeneDesign::Graph;
74 0         0 import Bio::GeneDesign::Graph qw(:GD);
75             }
76              
77 9         28 $self->{EMBOSS} = Bio::GeneDesign::ConfigData->config('EMBOSS_support');
78 9 50       31 if ($self->{EMBOSS})
79             {
80 0         0 require Bio::GeneDesign::Palindrome;
81 0         0 import Bio::GeneDesign::Palindrome qw(:GD);
82             }
83              
84 9         52 $self->{BLAST} = Bio::GeneDesign::ConfigData->config('BLAST_support');
85 9 50       34 if ($self->BLAST)
86             {
87             #$ENV{BLASTPLUSDIR} = Bio::GeneDesign::ConfigData->config('blast_path');
88 0         0 require Bio::GeneDesign::Blast;
89 0         0 import Bio::GeneDesign::Blast qw(:GD);
90             }
91              
92 9         27 $self->{vmatch} = Bio::GeneDesign::ConfigData->config('vmatch_support');
93 9 50       41 if ($self->{vmatch})
94             {
95 0         0 require Bio::GeneDesign::Vmatch;
96 0         0 import Bio::GeneDesign::Vmatch qw(:GD);
97             }
98              
99 9         41 $self->{codon_path} = $self->{conf} . 'codon_tables/';
100 9         23 $self->{organism} = undef;
101 9         18 $self->{codontable} = undef;
102 9         19 $self->{enzyme_set} = undef;
103 9         18 $self->{version} = $VERSION;
104 9         22 $self->{amb_trans_memo} = {};
105              
106 9         25 return $self;
107             }
108              
109             =head1 ACCESSORS
110              
111             =cut
112              
113             =head2 codon_path
114              
115             returns the directory containing codon tables
116              
117             =cut
118              
119             sub codon_path
120             {
121 0     0 1 0 my ($self) = @_;
122 0         0 return $self->{codon_path};
123             }
124              
125             =head2 EMBOSS
126              
127             returns a value if EMBOSS_support was vetted and approved during installation.
128              
129             =cut
130              
131             sub EMBOSS
132             {
133 0     0 1 0 my ($self) = @_;
134 0         0 return $self->{'EMBOSS'};
135             }
136              
137             =head2 BLAST
138              
139             returns a value if BLAST_support was vetted and approved during installation.
140              
141             =cut
142              
143             sub BLAST
144             {
145 9     9 1 22 my ($self) = @_;
146 9         29 return $self->{'BLAST'};
147             }
148              
149             =head2 graph
150              
151             returns a value if graphing_support was vetted and approved during installation.
152              
153             =cut
154              
155             sub graph
156             {
157 0     0 1 0 my ($self) = @_;
158 0         0 return $self->{'graph'};
159             }
160              
161             =head2 vmatch
162              
163             returns a value if vmatch_support was vetted and approved during installation.
164              
165             =cut
166              
167             sub vmatch
168             {
169 0     0 1 0 my ($self) = @_;
170 0         0 return $self->{'vmatch'};
171             }
172              
173             =head2 enzyme_set
174              
175             Returns a hash reference where the keys are enzyme names and the values are
176             L objects, if the enzyme
177             set has been defined.
178              
179             To set this value, use L.
180              
181             =cut
182              
183             sub enzyme_set
184             {
185 4     4 1 853 my ($self) = @_;
186 4         17 return $self->{'enzyme_set'};
187             }
188              
189             =head2 enzyme_set_name
190              
191             Returns the name of the enzyme set in use, if there is one.
192              
193             To set this value, use L.
194              
195             =cut
196              
197             sub enzyme_set_name
198             {
199 0     0 1 0 my ($self) = @_;
200 0         0 return $self->{'enzyme_set_name'};
201             }
202              
203             =head2 all_enzymes
204              
205             Returns a hash reference where the keys are enzyme names and the values are
206             L objects
207              
208             To set this value, use L.
209              
210             =cut
211              
212             sub all_enzymes
213             {
214 0     0 1 0 my ($self) = @_;
215 0         0 return $self->{'all_enzymes'};
216             }
217              
218             =head2 organism
219              
220             Returns the name of the organism in use, if there is one.
221              
222             To set this value, use L.
223              
224             =cut
225              
226             sub organism
227             {
228 0     0 1 0 my ($self) = @_;
229 0         0 return $self->{'organism'};
230             }
231              
232             =head2 codontable
233              
234             Returns the codon table in use, if there is one.
235              
236             The codon table is a hash reference where the keys are upper case nucleotides
237             and the values are upper case single letter amino acids.
238              
239             my $codon_t = $GD->codontable();
240             $codon_t->{"ATG"} eq "M" || die;
241              
242             To set this value, use L.
243              
244             =cut
245              
246             sub codontable
247             {
248 1     1 1 571 my ($self) = @_;
249 1         6 return $self->{'codontable'};
250             }
251              
252             =head2 reversecodontable
253              
254             Returns the reverse codon table in use, if there is one.
255              
256             The reverse codon table is a hash reference where the keys are upper case single
257             letter amino acids and the values are upper case nucleotides.
258              
259             my $revcodon_t = $GD->reversecodontable();
260             $revcodon_t->{"M"} eq "ATG" || die;
261              
262             This value is set automatically when L is run.
263              
264             =cut
265              
266             sub reversecodontable
267             {
268 1     1 1 1209 my ($self) = @_;
269 1         3 return $self->{'reversecodontable'};
270             }
271              
272             =head2 rscutable
273              
274             Returns the RSCU table in use, if there is one.
275              
276             The RSCU codon table is a hash reference where the keys are upper case
277             nucleotides and the values are floats.
278              
279             my $rscu_t = $GD->rscutable();
280             $rscu_t->{"ATG"} eq 1.00 || die;
281              
282             To set this value, use L.
283              
284             =cut
285              
286             sub rscutable
287             {
288 1     1 1 3536 my ($self) = @_;
289 1         5 return $self->{'rscutable'};
290             }
291              
292              
293             =head1 FUNCTIONS
294              
295             =cut
296              
297             =head2 melt
298              
299             my $Tm = $GD->melt(-sequence => $myseq);
300              
301             The -sequence argument is required.
302              
303             Returns the melting temperature of a DNA sequence.
304              
305             You can set the salt and DNA concentrations with the -salt and -concentration
306             arguments; they are 50mm (.05) and 100 pm (.0000001) respectively.
307              
308             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
309             object to be analyzed with the -sequence flag.
310              
311             There are four different formulae to choose from. If you wish to use the nearest
312             neighbor method, use the -nearest_neighbor flag. Otherwise the appropriate
313             formula will be determined by the length of your -sequence argument.
314              
315             For sequences under 14 base pairs:
316             Tm = (4 * #GC) + (2 * #AT).
317              
318             For sequences between 14 and 50 base pairs:
319             Tm = 100.5 + (41 * #GC / length) - (820 / length) + 16.6 * log10(salt)
320              
321             For sequences over 50 base pairs:
322             Tm = 81.5 + (41 * #GC / length) - (500 / length) + 16.6 * log10(salt) - .62;
323              
324             =cut
325              
326             sub melt
327             {
328 4     4 1 5473 my ($self, @args) = @_;
329              
330 4         18 my ($seq, $salt, $conc, $nnflag)
331             = $self->_rearrange([qw(
332             sequence
333             salt
334             concentration
335             nearest_neighbor)], @args);
336              
337 4 50       84 $self->throw("No sequence provided for the melt function")
338             unless ($seq);
339              
340 4 100       10 $nnflag = $nnflag ? 1 : 0;
341 4   50     15 $salt = $salt || .05;
342 4   50     10 $conc = $conc || .0000001;
343 4         9 my $str = $self->_stripdown($seq, q{}, 1);
344              
345 4 100       9 if ($nnflag)
346             {
347 1         4 return _ntherm($str, $salt, $conc);
348             }
349 3         11 return _melt($str, $salt, $conc);
350             }
351              
352             =head2 complement
353              
354             $my_seq = "AATTCG";
355              
356             my $complemented_seq = $GD->complement($my_seq);
357             $complemented_seq eq "TTAAGC" || die;
358              
359             my $reverse_complemented_seq = $GD->complement($my_seq, 1);
360             $reverse_complemented_seq eq "CGAATT" || die;
361              
362             #clean
363             my $complemented_seq = $GD->complement(-sequence => $my_seq);
364             $complemented_seq eq "TTAAGC" || die;
365              
366             my $reverse_complemented_seq = $GD->complement(-sequence => $my_seq,
367             -reverse => 1);
368             $reverse_complemented_seq eq "CGAATT" || die;
369              
370              
371             The -sequence argument is required.
372              
373             Complements or reverse complements a DNA sequence.
374              
375             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
376             object to be processed.
377              
378             If you also pass along a true statement, the sequence will be reversed and
379             complemented.
380              
381             =cut
382              
383             sub complement
384             {
385 4     4 1 4830 my ($self, @args) = @_;
386              
387 4         16 my ($seq, $swit)
388             = $self->_rearrange([qw(
389             sequence
390             reverse)], @args);
391              
392 4 50       31 $self->throw("No sequence provided for the complement function")
393             unless $seq;
394              
395 4   100     13 $swit = $swit || 0;
396              
397 4         7 my $str = $self->_stripdown($seq, q{}, 1);
398              
399 4         12 return _complement($str, $swit);
400             }
401              
402             =head2 rcomplement
403              
404             Sugar time!
405              
406             $my_seq = "AATTCG";
407              
408             my $reverse_complemented_seq = $GD->rcomplement($my_seq);
409             $reverse_complemented_seq eq "CGAATT" || die;
410              
411             #clean
412              
413             my $reverse_complemented_seq = $GD->complement(-sequence => $my_seq,
414             -reverse => 1);
415             $reverse_complemented_seq eq "CGAATT" || die;
416              
417              
418             The -sequence argument is required.
419              
420             Reverse complements a DNA sequence.
421              
422             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
423             object to be processed.
424              
425             =cut
426              
427             sub rcomplement
428             {
429 3     3 1 4219 my ($self, @args) = @_;
430              
431 3         11 my ($seq) = $self->_rearrange([qw(sequence)], @args);
432              
433 3 50       22 $self->throw('No sequence provided for the rcomplement function')
434             unless $seq;
435              
436 3         8 my $str = $self->_stripdown($seq, q{}, 1);
437              
438 3         9 return _complement($str, 1);
439             }
440              
441             =head2 transcribe
442              
443             $my_seq = "AATTCG";
444              
445             my $RNA_seq = $GD->transcribe($my_seq);
446             $complemented_seq eq "AAUUCG" || die;
447              
448             The -sequence argument is required.
449              
450             Transcribes an RNA sequence from a DNA sequence.
451              
452             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
453             object to be processed.
454              
455             =cut
456              
457             sub transcribe
458             {
459 0     0 1 0 my ($self, @args) = @_;
460              
461 0         0 my ($seq) = $self->_rearrange([qw(sequence)], @args);
462              
463 0 0       0 $self->throw("No sequence provided for the transcribe function")
464             unless $seq;
465              
466 0         0 my $str = $self->_stripdown($seq, q{}, 1);
467              
468 0         0 return _toRNA($str);
469             }
470              
471             =head2 count
472              
473             $my_seq = "AATTCG";
474             my $count = $GD->count($my_seq);
475             $count->{C} == 1 || die;
476             $count->{G} == 1 || die;
477             $count->{A} == 2 || die;
478             $count->{GCp} == 33.3 || die;
479             $count->{ATp} == 66.7 || die;
480              
481             #clean
482             my $count = $GD->count(-sequence => $my_seq);
483              
484             You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
485             object.
486              
487             the count function counts the bases in a DNA sequence and returns a hash
488             reference where each base (including the ambiguous bases) are keys and the
489             values are the number of times they appear in the sequence. There are also the
490             special values GCp and ATp for GC and AT percentage.
491              
492             =cut
493              
494             sub count
495             {
496 2     2 1 3776 my ($self, @args) = @_;
497              
498 2         9 my ($seq) = $self->_rearrange([qw(sequence)], @args);
499              
500 2 50       18 $self->throw("No sequence provided for the count function")
501             unless ($seq);
502              
503              
504 2         6 my $str = $self->_stripdown($seq, q{}, 1);
505              
506 2         9 return _count($str);
507             }
508              
509              
510             =head2 GC_windows
511              
512             takes a nucleotide sequence, a window size, and minimum and maximum values.
513             returns lists of real coordinates of subsequences that violate mimimum or
514             maximum GC percentages.
515              
516             Values are returned inside an array reference such that the first value is an
517             array ref of minimum violators (as array refs of left/right coordinates), and
518             the second value is an array ref of maximum violators.
519              
520             $return_value = [
521             [[left, right], [left, right]], #minimum violators
522             [[left, right], [left, right]] #maximum violators
523             ];
524              
525             =cut
526              
527             sub GC_windows
528             {
529 0     0 1 0 my ($self, @args) = @_;
530              
531 0         0 my ($seq, $win, $min, $max) = $self->_rearrange([qw(
532             sequence window minimum maximum)], @args);
533              
534 0 0       0 $self->throw("No sequence provided for the GC_windows function")
535             unless ($seq);
536              
537 0         0 my $str = $self->_stripdown($seq, q{}, 1);
538              
539 0         0 return _gcwindows($str, $win, $min, $max);
540             }
541              
542             =head2 regex_nt
543              
544             my $my_seq = "ABC";
545             my $regex = $GD->regex_nt(-sequence => $my_seq);
546             # $regex is qr/A[CGT]C/;
547              
548             my $regarr = $GD->regex_nt(-sequence => $my_seq --reverse_complement => 1);
549             # $regarr is [qr/A[CGT]C/, qr/G[ACG]T/]
550              
551              
552             You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
553             object to be processed with the -sequence flag.
554              
555             regex_nt creates a compiled regular expression or a set of them that can be used
556             to query large nucleotide sequences for possibly ambiguous subsequences.
557              
558              
559             If you want to get regular expressions for both the forward and reverse senses
560             of the DNA, use the -reverse_complement flag and expect a reference to an array
561             of compiled regexes.
562              
563             =cut
564              
565             sub regex_nt
566             {
567 6     6 1 12702 my ($self, @args) = @_;
568              
569 6         29 my ($seq, $arrswit)
570             = $self->_rearrange([qw(
571             sequence
572             reverse_complement)], @args
573             );
574              
575 6 50       97 $self->throw("no sequence provided the regex_nt function")
576             unless ($seq);
577              
578 6         16 my $str = $self->_stripdown($seq, q{}, 1);
579              
580 6 100       14 if ($arrswit)
581             {
582 2         6 return _regarr($str);
583             }
584             else
585             {
586 4         14 return _regres($str, 1);
587             }
588             }
589              
590             =head2 regex_aa
591              
592             my $my_pep = "AEQ*";
593             my $regex = $GD->regex_aa(-sequence => $my_pep);
594             $regex == qr/AEQ[\*]/ || die;
595              
596             Creates a compiled regular expression or a set of them that can be used to query
597             large amino acid sequences for smaller subsequences.
598              
599             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
600             object to be processed with the -sequence flag.
601              
602             =cut
603              
604             sub regex_aa
605             {
606 1     1 1 632 my ($self, $seq) = @_;
607              
608 1 50       4 $self->throw("no sequence provided for the regex_aa function")
609             unless ($seq);
610              
611 1         4 my $str = $self->_stripdown($seq, q{}, 1);
612              
613 1         5 return _regres($str, 2);
614             }
615              
616             =head2 sequence_is_ambiguous
617              
618             my $my_seq = "ABC";
619             my $flag = $GD->sequence_is_ambiguous($my_seq);
620             $flag == 1 || die;
621              
622             $my_seq = "ATC";
623             $flag = $GD->sequence_is_ambiguous($my_seq);
624             $flag == 0 || die;
625              
626             Checks to see if a DNA sequence contains ambiguous bases (RYMKWSBDHVN) and
627             returns true if it does.
628              
629             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
630             object to be processed.
631              
632             =cut
633              
634             sub sequence_is_ambiguous
635             {
636 5     5 1 2547 my ($self, $seq) = @_;
637              
638 5 50       13 $self->throw("no sequence provided for the sequence_is_ambiguous function")
639             unless ($seq);
640              
641 5         13 my $str = $self->_stripdown($seq, q{}, 1);
642              
643 5         17 return _is_ambiguous($str);
644             }
645              
646             =head2 ambiguous_translation
647              
648             my $my_seq = "ABC";
649             my @peps = $GD->ambiguous_translation(-sequence => $my_seq, -frame => 1);
650             # @peps is qw(I T C)
651              
652             You must pass a string variable, a Bio::Seq object, or a Bio::SeqFeatureI object
653             to be processed.
654              
655             Translates a nucleotide sequence that may have ambiguous bases and returns an
656             array of possible peptides.
657              
658             The frame argument may be 1, 2, 3, -1, -2, or -3.
659             It may also be t (three, 1, 2, 3), or s (six, 1, 2, 3, -1, -2, -3).
660             It defaults to 1.
661              
662             =cut
663              
664             sub ambiguous_translation
665             {
666 4     4 1 10776 my ($self, @args) = @_;
667              
668 4         18 my ($seq, $frame)
669             = $self->_rearrange([qw(sequence frame)], @args);
670              
671 4 50       124 $self->throw("no sequence provided for the ambiguous_translation function")
672             unless ($seq);
673              
674 4   50     11 $frame = $frame || 1;
675              
676 4         8 my %ambtransswits = map {$_ => 1} qw(1 2 3 -1 -2 -3 s t);
  32         60  
677              
678             $self->throw("Bad frame argument to ambiguous_translation")
679 4 50       14 unless (exists $ambtransswits{$frame});
680              
681 4         14 my $str = $self->_stripdown($seq, q{}, 1);
682              
683             return _amb_translation($str,
684             $self->{codontable},
685             $frame,
686 4         19 $self->{amb_trans_memo});
687             }
688              
689             =head2 ambiguous_transcription
690              
691             my $my_seq = "ABC";
692             my $seqs = $GD->ambiguous_transcription($my_seq);
693             # $seqs is [qw(ACC AGC ATC)]
694              
695             Deambiguates a nucleotide sequence that may have ambiguous bases and returns a
696             reference to a sorted array of possible unambiguous sequences.
697              
698             You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
699             object to be processed.
700              
701             =cut
702              
703             sub ambiguous_transcription
704             {
705 1     1 1 2919 my ($self, $seq) = @_;
706              
707 1 50       4 $self->throw("no sequence provided for the ambiguous_transcription function")
708             unless ($seq);
709              
710 1         4 my $str = $self->_stripdown($seq, q{}, 1);
711              
712 1         5 return _amb_transcription($str);
713             }
714              
715             =head2 positions
716              
717             my $seq = "TGCTGACTGCAGTCAGTACACTACGTACGTGCATGAC";
718             my $seek = "CWC";
719              
720             my $positions = $GD->positions(-sequence => $seq,
721             -query => $seek);
722             # $positions is {18 => "CAC"}
723              
724             $positions = $GD->positions(-sequence => $seq,
725             -query => $seek,
726             -reverse_complement => 1);
727             # $positions is {18 => "CAC", 28 => "GTG"}
728              
729             Finds and returns all the positions and sequences of a potentially ambiguous
730             subsequence in a larger sequence. The reverse_complement flag is off by default.
731              
732             You can pass either string variables, Bio::Seq objects, or Bio::SeqFeatureI
733             objects as the sequence and query arguments; additionally you may pass a
734             L object as the query
735             argument.
736              
737             =cut
738              
739             sub positions
740             {
741 2     2 1 3686 my ($self, @args) = @_;
742              
743 2         9 my ($seq, $seek, $revcom)
744             = $self->_rearrange([qw(
745             sequence
746             query
747             reverse_complement)], @args);
748              
749 2 50       48 $self->throw("no sequence provided for the positions function")
750             unless ($seq);
751              
752 2 50       5 $self->throw("no query provided for the positions function")
753             unless ($seek);
754              
755              
756 2         4 my $base = $self->_stripdown($seq, q{}, 1);
757              
758 2         6 my $regarr = [];
759 2         3 my $query = $seek;
760 2         3 my $qref = ref($seek);
761 2 50       4 if ($qref)
762             {
763 0 0 0     0 $self->throw("object $qref is not a Bio::Seq or Bio::SeqFeature, or " .
      0        
764             "Bio::GeneDesign::RestrictionEnzyme object")
765             unless ($seek->isa("Bio::Seq")
766             || $seek->isa("Bio::SeqFeatureI")
767             || $seek->isa("Bio::GeneDesign::RestrictionEnzyme"));
768 0 0       0 if ($seek->isa("Bio::GeneDesign::RestrictionEnzyme"))
769             {
770 0         0 $regarr = $seek->regex;
771             }
772             else
773             {
774 0 0       0 $query = ref($seek->seq) ? $seek->seq->seq : $seek->seq;
775 0 0       0 $regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)];
776             }
777             }
778             else
779             {
780 2 50       9 $regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)];
781             }
782              
783 2         37 return _positions($base, $regarr);
784             }
785              
786             =head2 parse_organisms
787              
788             Returns two hash references. The first contains the names of all rscu tables.
789             The second contains the name of all codon tables.
790              
791             =cut
792              
793             sub parse_organisms
794             {
795 0     0 1 0 my ($self) = @_;
796 0         0 my ($rscu, $cods) = _parse_organisms($self->{codon_path});
797 0         0 return ($rscu, $cods);
798             }
799              
800             =head2 set_codontable
801              
802             # load a codon table from the GeneDesign configuration directory
803             $GD->set_codontable(-organism_name => "yeast");
804              
805             # load a codon table from an arbitrary path and catch it in a variable
806             my $codon_t = $GD->set_codontable(-organism_name => "custom",
807             -table_path => "/path/to/table.ct");
808              
809             The -organism_name argument is required.
810              
811             This function loads, sets, and returns a codon definition table. After it is run
812             the accessor L will return the hash reference that
813             represents the codon table.
814              
815             If no path is provided, the configuration directory /codon_tables is checked for
816             tables that match the provided organism name. Any codon table that is using a
817             non standard definition for a codon will cause a warning to be issued.
818              
819             The table format for codon tables is
820              
821             # Standard genetic code
822             {TTT} = F
823             {TTC} = F
824             {TTA} = L
825             ...
826              
827             See L
828              
829             =cut
830              
831             sub set_codontable
832             {
833 7     7 1 25 my ($self, @args) = @_;
834 7         27 my ($orgname, $table_path)
835             = $self->_rearrange([qw(
836             organism_name
837             table_path)], @args);
838              
839 7 50       189 $self->throw("No organsim name provided") unless ($orgname);
840 7         17 $self->{organism} = $orgname;
841              
842 7 50 33     156 $self->throw("$table_path does not exist")
843             if ($table_path && ! -e $table_path);
844              
845 7 50       33 if (! $table_path )
846             {
847 0         0 $table_path = $self->{codon_path} . $orgname . ".ct";
848 0 0 0     0 if (-e $table_path && $orgname ne 'Standard')
849             {
850 0         0 warn "Using nonstandard codon definitions for $orgname\n";
851             }
852             else
853             {
854 0         0 $table_path = $self->{codon_path} . "Standard.ct";
855             }
856             }
857              
858 7         40 $self->{codontable} = _parse_codon_file($table_path);
859 7         35 $self->{reversecodontable} = _reverse_codon_table($self->{codontable});
860 7         19 return $self->{codontable};
861             }
862              
863             =head2 set_rscutable
864              
865             # load a RSCU table from the GeneDesign configuration directory
866             $GD->set_rscutable(-organism_name => "yeast");
867              
868             # load an RSCU table from an arbitrary path and catch it in a variable
869             my $rscu_t = $GD->set_rscutable(-organism_name => "custom",
870             -table_path => "/path/to/table.rscu");
871              
872             The -organism_name argument is required.
873              
874             This function loads, sets, and returns an RSCU table. After it is run
875             the accessor L will return the hash reference that
876             represents the RSCU table.
877              
878             If no path is provided, the configuration directory /codon_tables is checked for
879             tables that match the provided organism name. If there is no table in that
880             directory, a warning will appear and the flat RSCU table will be used.
881              
882             Any RSCU table that is missing a definition for a codon will cause a warning to
883             be issued. The table format for RSCU tables is
884              
885             # Saccharomyces cerevisiae (Highly expressed genes)
886             # Nucleic Acids Res 16, 8207-8211 (1988)
887             {TTT} = 0.19
888             {TTC} = 1.81
889             {TTA} = 0.49
890             ...
891              
892             See L.
893              
894             =cut
895              
896             sub set_rscutable
897             {
898 7     7 1 24 my ($self, @args) = @_;
899 7         44 my ($orgname, $rscu_path)
900             = $self->_rearrange([qw(
901             organism_name
902             rscu_path)], @args);
903              
904 7 50       216 $self->throw("No organsim name provided") unless ($orgname);
905 7         20 $self->{organism} = $orgname;
906              
907 7 50 33     154 $self->throw("$rscu_path does not exist")
908             if ($rscu_path && ! -e $rscu_path);
909              
910 7 50       33 if (! $rscu_path)
911             {
912 0         0 $rscu_path = $self->{codon_path} . $orgname . ".rscu";
913 0 0       0 if (! -e $rscu_path)
914             {
915 0         0 warn "No RSCU table for $orgname found. Using Unbiased values\n";
916 0         0 $rscu_path = $self->{codon_path} . "Unbiased.rscu";
917             }
918             }
919              
920 7         30 $self->{rscutable} = _parse_codon_file($rscu_path);
921 7         22 return $self->{rscutable};
922             }
923              
924             =head2 set_organism
925              
926             # load both codon tables and RSCU tables simultaneously
927             $GD->set_organism(-organism_name => "yeast");
928              
929             # with arguments
930             $GD->set_organism(-organism_name => "custom",
931             -table_path => "/path/to/table.ct",
932             -rscu_path => "/path/to/table.rscu");
933              
934              
935             The -organism_name argument is required.
936              
937             This function is just a shortcut; it runs L and
938             L. See those functions for details.
939              
940             =cut
941              
942             sub set_organism
943             {
944 7     7 1 60 my ($self, @args) = @_;
945              
946 7         67 my ($orgname, $table_path, $rscu_path)
947             = $self->_rearrange([qw(
948             organism_name
949             table_path
950             rscu_path)], @args);
951              
952 7 50       275 $self->throw("No organsim name provided") unless ($orgname);
953 7         17 $self->{organism} = $orgname;
954              
955 7         36 $self->set_codontable(-organism_name => $orgname, -table_path => $table_path);
956 7         43 $self->set_rscutable(-organism_name => $orgname, -rscu_path=> $rscu_path);
957              
958 7         26 return;
959             }
960              
961             =head2 codon_count
962              
963             # count the codons in a list of sequences
964             my $tally = $GD->codon_count(-input => \@sequences);
965              
966             # add a gene to an existing codon count
967             $tally = $GD->codon_count(-input => $sequence,
968             -count => $tally);
969              
970             # add a list of Bio::Seq objects to an existing codon count
971             $tally = $GD->codon_count(-input => \@seqobjects,
972             -count => $tally);
973              
974             The -input argument is required and will take a string variable, a Bio::Seq
975             object, a Bio::SeqFeatureI object, or a reference to an array full of any
976             combination of those things.
977              
978             The codon_count function takes a set of sequences and counts how often each
979             codon appears in them. It returns a hash reference where the keys are upper case
980             nucleotide codons and the values are integers. If you pass a hash reference
981             containing codon counts with the -count argument, new counts will be added to
982             the old values.
983              
984             This function will warn you if non nucleotide codons are found.
985              
986             TODO: what about ambiguous codons?
987              
988             =cut
989              
990             sub codon_count
991             {
992 3     3 1 6186 my ($self, @args) = @_;
993              
994 3         17 my ($input, $count) = $self->_rearrange([qw(input count)], @args);
995              
996             $self->throw("no codon table has been defined")
997 3 50       55 unless $self->{codontable};
998              
999 3 50       9 $self->throw("no sequences were provided to count")
1000             unless $input;
1001              
1002 3         8 my $list = $self->_stripdown($input, 'ARRAY', 0);
1003              
1004             my $cod_count = _codon_count($list,
1005             $self->{codontable},
1006 3         13 $count);
1007              
1008 3 50       9 warn("There are bad codons in codon count\n") if (exists $cod_count->{XXX});
1009              
1010 3         12 return $cod_count;
1011             }
1012              
1013             =head2 generate_RSCU_table
1014              
1015             my $rscu_t = $GD->generate_RSCU_table(-sequences => \@list_of_sequences);
1016              
1017             The -sequences argument is required and will take a string variable, a Bio::Seq
1018             object, a Bio::SeqFeatureI object, or a reference to an array full of any
1019             combination of those things.
1020              
1021             The generate_RSCU_table function takes a set of sequences, counts how often each
1022             codon appears, and returns an RSCU table as a hash reference where the keys are
1023             upper case nucleotide codons and the values are floats.
1024              
1025             See L.
1026              
1027             =cut
1028              
1029             sub generate_RSCU_table
1030             {
1031 1     1 1 2683 my ($self, @args) = @_;
1032              
1033 1         8 my ($seqobjs) = $self->_rearrange([qw(sequences)], @args);
1034              
1035             $self->throw("no codon table has been defined")
1036 1 50       13 unless $self->{codontable};
1037              
1038 1 50       4 $self->throw("Sequence set must be provided")
1039             unless ($seqobjs);
1040              
1041             return _generate_RSCU_table(
1042             $self->codon_count(-input => $seqobjs),
1043             $self->{codontable},
1044             $self->{reversecodontable}
1045 1         5 );
1046             }
1047              
1048             =head2 generate_codon_report
1049              
1050             my $report = $GD->generate_codon_report(-sequences => \@list_of_sequences);
1051              
1052             The report will have the format
1053              
1054             TTT (F) 12800 0.74
1055             TTC (F) 21837 1.26
1056             TTA (L) 4859 0.31
1057             TTG (L) 18806 1.22
1058              
1059             where the first column in each group is the codon, the second column is the one
1060             letter amino acid abbreviation in parentheses, the third column is the number of
1061             times that codon has been seen, and the fourth column is the RSCU value for that
1062             codon.
1063              
1064             This report comes in a 4x4 layout, as would a standard genetic code table in a
1065             textbook.
1066              
1067             NO TEST
1068              
1069             =cut
1070              
1071             sub generate_codon_report
1072             {
1073 0     0 1 0 my ($self, @args) = @_;
1074              
1075 0         0 my ($seqobjs) = $self->_rearrange([qw(sequences)], @args);
1076              
1077             $self->throw("no codon table has been defined")
1078 0 0       0 unless $self->{codontable};
1079              
1080 0 0       0 $self->throw("Sequence set must be provided")
1081             unless ($seqobjs);
1082              
1083 0         0 my $count_t = $self->codon_count(-input => $seqobjs);
1084              
1085             my $rscu_t = _generate_RSCU_table(
1086             $count_t,
1087             $self->{codontable},
1088             $self->{reversecodontable}
1089 0         0 );
1090              
1091             my $report = _generate_codon_report(
1092             $count_t,
1093             $self->{codontable},
1094 0         0 $rscu_t
1095             );
1096              
1097 0         0 return $report;
1098             }
1099              
1100             =head2 generate_RSCU_file
1101              
1102             my $contents = $GD->generate_RSCU_file(
1103             -sequences => \@seqs,
1104             -comments => ["Got these codons from mice"]
1105             );
1106             open (my $OUT, '>', '/path/to/cods') || die "can't write to /path/to/cods";
1107             print $OUT $contents;
1108             close $OUT;
1109              
1110             This function generates a string that can be written to file to serve as a
1111             GeneDesign RSCU table. Provide a set of sequences and an optional array
1112             reference of comments to prepend to the file.
1113              
1114             The file will have the format
1115             # Comment 1
1116             # ...
1117             # Comment n
1118             {TTT} = 0.19
1119             {TTC} = 1.81
1120             ...
1121              
1122             NO TEST
1123              
1124             =cut
1125              
1126             sub generate_RSCU_file
1127             {
1128 0     0 1 0 my ($self, @args) = @_;
1129              
1130 0         0 my ($seqobjs, $comments) = $self->_rearrange([qw(sequences comments)], @args);
1131              
1132             $self->throw('No codon table has been defined')
1133 0 0       0 unless $self->{codontable};
1134              
1135 0 0       0 $self->throw('Sequence set must be provided')
1136             unless ($seqobjs);
1137              
1138 0 0       0 $self->throw('Comment argument must be array reference')
1139             unless ref($comments) eq 'ARRAY';
1140              
1141 0         0 my $count_t = $self->codon_count(-input => $seqobjs);
1142              
1143             my $rscu_t = _generate_RSCU_table(
1144             $count_t,
1145             $self->{codontable},
1146             $self->{reversecodontable}
1147 0         0 );
1148              
1149             my $report = _generate_codon_file(
1150             $rscu_t,
1151             $self->{reversecodontable},
1152 0         0 $comments
1153             );
1154              
1155 0         0 return $report;
1156             }
1157              
1158             =head2 list_enzyme_sets
1159              
1160             my @available_enzlists = $GD->list_enzyme_sets();
1161             # @available_enzlists == ('standard_and_IIB', 'blunts', 'IIB', 'nonpal', ...)
1162              
1163             Returns an array containing the names of every restriction enzyme recognition
1164             list GeneDesign knows about.
1165              
1166             =cut
1167              
1168             sub list_enzyme_sets
1169             {
1170 0     0 1 0 my ($self) = @_;
1171 0         0 my $epath = $self->{conf} . 'enzymes/';
1172 0         0 my @sets = ();
1173 0 0       0 opendir (my $ENZDIR, $epath) || $self->throw("can't opendir $epath");
1174 0         0 foreach my $list (readdir($ENZDIR))
1175             {
1176 0         0 my $name = basename($list);
1177 0         0 $name =~ s{\.[^.]+$}{}x;
1178 0 0 0     0 next if ($name eq q{.} || $name eq q{..});
1179 0         0 push @sets, $name;
1180             }
1181 0         0 closedir($ENZDIR);
1182 0         0 return @sets;
1183             }
1184              
1185             =head2 set_restriction_enzymes
1186              
1187             $GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1188              
1189             or
1190              
1191             $GD->set_restriction_enzymes(-list_path => '/path/to/enzyme_file');
1192              
1193             or even
1194              
1195             $GD->set_restriction_enzymes(
1196             -list_path => '/path/to/enzyme_file',
1197             -enzyme_set => 'custom_enzymes'
1198             );
1199              
1200             All will return a hash structure full of restriction enzymes.
1201              
1202             Tell GeneDesign which set of restriction enzymes to use. If you provide only a
1203             set name with the -enzyme_set flag, GeneDesign will check its config path for a
1204             matching file. Otherwise you must provide a path to a file (and optionally a
1205             name for the set).
1206              
1207             =cut
1208              
1209             sub set_restriction_enzymes
1210             {
1211 3     3 1 530 my ($self, @args) = @_;
1212              
1213 3         14 my ($set_name, $set_path)
1214             = $self->_rearrange([qw(enzyme_set list_path)], @args);
1215              
1216 3         66 my $def = 'all_enzymes';
1217 3         11 my $defpath = $self->{conf} . 'enzymes/' . $def;
1218 3         16 $self->{all_enzymes} = _define_sites($defpath);
1219              
1220              
1221 3 50 33     48 if (! $set_name && ! $set_path)
    50 33        
    50          
1222             {
1223 0         0 $self->{enzyme_set} = $self->{all_enzymes};
1224 0         0 $self->{enzyme_set_name} = $def;
1225             }
1226             elsif ($set_name && ! $set_path)
1227             {
1228 0         0 $set_path = $self->{conf} . "enzymes/$set_name";
1229 0 0       0 if ( ! -e $set_path)
1230             {
1231 0         0 my $list = join q{, }, $self->list_enzyme_sets();
1232 0         0 my $message = "No enzyme set found that matches $set_name; ";
1233 0         0 $message .= "Options are ($list)\n";
1234 0         0 $self->throw($message);
1235             }
1236 0         0 my $list = _parse_enzyme_list($set_path);
1237 0         0 my $set = {};
1238 0         0 foreach my $id (@$list)
1239             {
1240 0 0       0 if (! exists $self->{all_enzymes}->{$id})
1241             {
1242 0         0 warn "$id was not recognized as an enzyme... skipping\n";
1243 0         0 next;
1244             }
1245 0         0 $set->{$id} = $self->{all_enzymes}->{$id};
1246             }
1247 0         0 $self->{enzyme_set} = $set;
1248 0         0 $self->{enzyme_set_name} = $set_name;
1249             }
1250             elsif ($set_path)
1251             {
1252 3 50       173 $self->throw("No enzyme list found at $set_path\n")
1253             unless (-e $set_path);
1254 3         72 my $list = _parse_enzyme_list($set_path);
1255 3         9 my $set = {};
1256 3         11 foreach my $id (@$list)
1257             {
1258 87 50       166 if (! exists $self->{all_enzymes}->{$id})
1259             {
1260 0         0 warn "$id was not recognized as an enzyme... skipping\n";
1261 0         0 next;
1262             }
1263 87         158 $set->{$id} = $self->{all_enzymes}->{$id};
1264             }
1265 3         8 $self->{enzyme_set} = $set;
1266 3   33     194 $self->{enzyme_set_name} = $set_name || basename($set_path);
1267             }
1268 3         20 return $self->{enzyme_set};
1269             }
1270              
1271             =head2 remove_from_enzyme_set
1272              
1273             Removes a subset of enzymes from an enzyme list. This only happens in memory, no
1274             files will be altered. The argument is an array reference of enzyme names.
1275              
1276             $GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1277             $GD->remove_from_enzyme_set(-enzymes => ['SmaI', 'MlyI']);
1278              
1279             NO TEST
1280              
1281             =cut
1282              
1283             sub remove_from_enzyme_set
1284             {
1285 0     0 1 0 my ($self, @args) = @_;
1286              
1287 0         0 my ($enzes) = $self->_rearrange([qw(enzymes)], @args);
1288              
1289 0 0       0 return unless ($enzes);
1290              
1291             $self->throw("no enzyme set has been defined")
1292 0 0       0 unless $self->{enzyme_set};
1293              
1294 0         0 my @toremove = ();
1295 0         0 my $ref = ref ($enzes);
1296 0 0       0 if ($ref eq "ARRAY")
1297             {
1298 0         0 push @toremove, @$enzes;
1299             }
1300             else
1301             {
1302 0         0 push @toremove, $enzes;
1303             }
1304 0         0 foreach my $enz (@toremove)
1305             {
1306 0         0 my $eid = $enz;
1307 0         0 my $eref = ref $enz;
1308 0 0       0 if ($eref)
1309             {
1310 0 0       0 $self->throw("object in enzymes is not a " .
1311             "Bio::GeneDesign::RestrictionEnzyme object")
1312             unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme"));
1313 0         0 $eid = $enz->id;
1314             }
1315              
1316 0 0       0 if (exists $self->{enzyme_set}->{$eid})
1317             {
1318 0         0 delete $self->{enzyme_set}->{$eid};
1319             }
1320             }
1321 0         0 return;
1322             }
1323              
1324             =head2 add_to_enzyme_set
1325              
1326             Adds a subset of enzymes to an enzyme list. This only happens in memory, no
1327             files will be altered. The argument is an array reference of RestrictionEnzyme
1328             objects.
1329              
1330             #Grab all known enzymes
1331             my $allenz = $GD->set_restriction_enzymes(-enzyme_set => 'standard_and_IIB');
1332              
1333             #Pull out a few
1334             my @keepers = ($allenz->{'BmrI'}, $allenz->{'HphI'});
1335              
1336             #Give GeneDesign the enzyme set you want
1337             $GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1338              
1339             #Add the few enzymes it didn't have before
1340             $GD->add_to_enzyme_set(-enzymes => \@keepers);
1341              
1342             NO TEST
1343              
1344             =cut
1345              
1346             sub add_to_enzyme_set
1347             {
1348 0     0 1 0 my ($self, @args) = @_;
1349              
1350 0         0 my ($enzes) = $self->_rearrange([qw(enzymes)], @args);
1351              
1352 0 0       0 return unless ($enzes);
1353              
1354             $self->throw("no enzyme set has been defined")
1355 0 0       0 unless $self->{enzyme_set};
1356              
1357 0         0 my @toadds = ();
1358 0         0 my $ref = ref ($enzes);
1359 0 0       0 if ($ref eq "ARRAY")
1360             {
1361 0         0 push @toadds, @$enzes;
1362             }
1363             else
1364             {
1365 0         0 push @toadds, $enzes;
1366             }
1367 0         0 foreach my $enz (@toadds)
1368             {
1369 0 0       0 $self->throw("object in enzymes is not a " .
1370             "Bio::GeneDesign::RestrictionEnzyme object")
1371             unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme"));
1372              
1373 0 0       0 next if (exists $self->{enzyme_set}->{$enz->id});
1374 0         0 $self->{enzyme_set}->{$enz->id} = $enz;
1375             }
1376 0         0 return;
1377             }
1378              
1379             =head2 restriction_status
1380              
1381             =cut
1382              
1383             sub restriction_status
1384             {
1385 1     1 1 39172 my ($self, @args) = @_;
1386              
1387 1         8 my ($seq) = $self->_rearrange([qw(sequence)], @args);
1388              
1389             $self->throw("no enzyme set has been defined")
1390 1 50       33 unless $self->{enzyme_set};
1391              
1392 1 50       3 $self->throw("No arguments provided for set_restriction_enzymes")
1393             unless ($seq);
1394              
1395 1         6 my $str = $self->_stripdown($seq, q{}, 0);
1396 1         3 my @reslist = values %{$self->{enzyme_set}};
  1         5  
1397 1         7 return _define_site_status($str, \@reslist);
1398             }
1399              
1400             =head2 build_prefix_tree
1401              
1402             Take an array reference of nucleotide sequences (they can be strings, Bio::Seq
1403             objects, or Bio::GeneDesign::RestrictionEnzyme objects) and create a suffix
1404             tree. If you add the peptide flag, the sequences will be ambiguously translated
1405             before they are added to the suffix tree. Otherwise they will be ambiguously
1406             transcribed. It will add the reverse complement of any non peptide sequence as
1407             long as the reverse complement is different.
1408              
1409             my $tree = $GD->build_prefix_tree(-input => ['GGATCC']);
1410              
1411             my $ptree = $GD->build_prefix_tree(
1412             -input => ['GGCCNNNNNGGCC'],
1413             -peptide => 1
1414             );
1415              
1416             =cut
1417              
1418             sub build_prefix_tree
1419             {
1420 2     2 1 4953 my ($self, @args) = @_;
1421              
1422 2         15 my ($list, $pep) = $self->_rearrange([qw(input peptide)], @args);
1423              
1424 2 50       68 $self->throw("no input provided")
1425             unless ($list);
1426              
1427 2         9 my $tree = Bio::GeneDesign::PrefixTree->new();
1428              
1429 2         6 foreach my $seq (@$list)
1430             {
1431 10         15 my $base = $seq;
1432 10         13 my $id = $seq;
1433 10         30 my $ref = ref($seq);
1434 10 50       17 if ($ref)
1435             {
1436 10 50 33     97 $self->throw("object in input is not a Bio::Seq, Bio::SeqFeature, or " .
      33        
1437             "Bio::GeneDesign::RestrictionEnzyme object")
1438             unless ($seq->isa("Bio::Seq")
1439             || $seq->isa("Bio::SeqFeatureI")
1440             || $seq->isa("Bio::GeneDesign::RestrictionEnzyme")
1441             );
1442 10 50       27 $base = ref($seq->seq) ? $seq->seq->seq : $seq->seq;
1443 10         28 $id = $seq->id;
1444             }
1445              
1446 10 100       23 if ($pep)
1447             {
1448             $self->throw('No codon table has been defined')
1449 4 50       10 unless $self->{codontable};
1450              
1451             my @fpeptides = _amb_translation($base, $self->{codontable},
1452 4         16 't', $self->{amb_trans_memo});
1453 4         19 $tree->add_prefix($_, $id, $base) foreach (@fpeptides);
1454              
1455 4         12 my $esab = _complement($base, 1);
1456 4         7 my $lagcheck = $esab;
1457 4         14 while (substr($lagcheck, -1) eq "N")
1458             {
1459 0         0 $lagcheck = substr($lagcheck, 0, length($lagcheck) - 1);
1460             }
1461 4 100 66     23 if ($esab ne $base && $lagcheck eq $esab)
1462             {
1463             my @rpeptides = _amb_translation($esab, $self->{codontable},
1464 2         9 't', $self->{amb_trans_memo});
1465 2         10 $tree->add_prefix($_, $id, $esab) foreach (@rpeptides);
1466             }
1467             }
1468             else
1469             {
1470 6         18 my $fnucs = _amb_transcription($base);
1471 6         20 $tree->add_prefix($_, $id, undef) foreach (@$fnucs);
1472              
1473 6         15 my $esab = _complement($base, 1);
1474 6 100       21 if ($esab ne $base)
1475             {
1476 3         8 my $rnucs = _amb_transcription($esab);
1477 3         8 $tree->add_prefix($_, $id, undef) foreach (@$rnucs);
1478             }
1479             }
1480             }
1481 2         316 return $tree;
1482             }
1483              
1484             =head2 search_prefix_tree
1485              
1486             Takes a suffix tree and a sequence and searches for results, which are returned
1487             as in the Bio::GeneDesign::PrefixTree documentation.
1488              
1489             my $hits = $GD->search_prefix_tree(-tree => $ptree, -sequence => $mygeneseq);
1490              
1491             # @$hits = (['BamHI', 4, 'GGATCC', 'i hope this didn't pop up'],
1492             # ['OhnoI', 21, 'GGCCC', 'I hope these pop up'],
1493             # ['WoopsII', 21, 'GGCCC', 'I hope these pop up']
1494             #);
1495              
1496             =cut
1497              
1498             sub search_prefix_tree
1499             {
1500 2     2 1 125115 my ($self, @args) = @_;
1501              
1502 2         17 my ($tree, $seq) = $self->_rearrange([qw(tree sequence)], @args);
1503              
1504 2 50       76 $self->throw("no query sequence provided")
1505             unless ($seq);
1506              
1507 2 50       7 $self->throw("no suffix tree provided")
1508             unless ($tree);
1509              
1510 2 50       15 $self->throw("tree is not a Bio::GeneDesign::PrefixTree")
1511             unless ($tree->isa("Bio::GeneDesign::PrefixTree"));
1512              
1513 2         11 my $str = $self->_stripdown($seq, q{}, 0);
1514              
1515 2         11 my @hits = $tree->find_prefixes($str);
1516              
1517 2         36 return \@hits;
1518             }
1519              
1520             =head2 pattern_aligner
1521              
1522             =cut
1523              
1524             sub pattern_aligner
1525             {
1526 3     3 1 7640 my ($self, @args) = @_;
1527              
1528 3         17 my ($seq, $pattern, $peptide, $re)
1529             = $self->_rearrange([qw(sequence pattern peptide offset)], @args);
1530              
1531             $self->throw("no codon table has been defined")
1532 3 50       116 unless $self->{codontable};
1533              
1534 3 50       8 $self->throw("no nucleotide sequence provided")
1535             unless $seq;
1536              
1537 3   100     10 $re = $re || 0;
1538              
1539 3         10 my $str = $self->_stripdown($seq, q{}, 1);
1540              
1541 3   33     16 $peptide = $peptide || $self->translate(-sequence => $str);
1542              
1543             my ($newpattern, $offset) = _pattern_aligner($str,
1544             $pattern,
1545             $peptide,
1546             $self->{codontable},
1547             $self->{amb_trans_memo}
1548 3         12 );
1549 3 100       17 return $re ? ($newpattern, $offset) : $newpattern;
1550             }
1551              
1552             =head2 pattern_adder
1553              
1554             =cut
1555              
1556             sub pattern_adder
1557             {
1558 1     1 1 3016 my ($self, @args) = @_;
1559              
1560 1         8 my ($seq, $pattern) = $self->_rearrange([qw(sequence pattern)], @args);
1561              
1562             $self->throw("no codon table has been defined")
1563 1 50       39 unless $self->{codontable};
1564              
1565 1 50       4 $self->throw("no nucleotide sequence provided")
1566             unless $seq;
1567              
1568 1         5 my $str = $self->_stripdown($seq, q{}, 1);
1569 1         5 my $pat = $self->_stripdown($pattern, q{}, 1);
1570              
1571             my $newsequence = _pattern_adder($str,
1572             $pat,
1573             $self->{codontable},
1574             $self->{reversecodontable},
1575             $self->{amb_trans_memo}
1576 1         7 );
1577 1         5 return $newsequence;
1578             }
1579              
1580             =head2 codon_change_type
1581              
1582             =cut
1583              
1584             sub codon_change_type
1585             {
1586 0     0 1 0 my ($self, @args) = @_;
1587              
1588 0         0 my ($codold, $codnew) = $self->_rearrange([qw(from to)], @args);
1589              
1590             $self->throw("no codon table has been defined")
1591 0 0       0 unless $self->{codontable};
1592              
1593 0 0       0 $self->throw("no from sequence provided")
1594             unless $codold;
1595              
1596 0 0       0 $self->throw("no to sequence provided")
1597             unless $codnew;
1598              
1599 0         0 my $changetype = _codon_change_type($codold, $codnew, $self->{codontable});
1600 0         0 return $changetype;
1601             }
1602              
1603             =head2 translate
1604              
1605             =cut
1606              
1607             sub translate
1608             {
1609 9     9 1 6046 my ($self, @args) = @_;
1610              
1611 9         32 my ($seq, $frame) = $self->_rearrange([qw(sequence frame)], @args);
1612              
1613             $self->throw("no codon table has been defined")
1614 9 50       209 unless $self->{codontable};
1615              
1616 9 50       16 $self->throw("no nucleotide sequence provided")
1617             unless $seq;
1618              
1619 9         30 my $str = $self->_stripdown($seq, q{}, 1);
1620              
1621 9 100       16 $frame = $frame ? $frame : 1;
1622              
1623 9 50 33     40 $self->throw("frame must be -3, -2, -1, 1, 2, or 3")
1624             if (abs($frame) > 4 || abs($frame) < 0);
1625              
1626 9         30 my $peptide = _translate($str, $frame, $self->{codontable});
1627 9 100       19 if (ref $seq)
1628             {
1629 6         22 my $newobj = $seq->clone();
1630 6 50       183 my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1631 6         74 $desc = "translated with " . $self->{organism} . " codon values";
1632 6         17 $newobj->seq($peptide);
1633 6         384 $newobj->alphabet("protein");
1634 6         87 $newobj->desc($desc);
1635 6         55 return $newobj;
1636             }
1637             else
1638             {
1639 3         12 return $peptide;
1640             }
1641             }
1642              
1643             =head2 reverse_translate_algorithms
1644              
1645             =cut
1646              
1647             sub reverse_translate_algorithms
1648             {
1649 0     0 1 0 return Bio::GeneDesign::ReverseTranslate::_list_algorithms();
1650             }
1651              
1652             =head2 reverse_translate
1653              
1654             =cut
1655              
1656             sub reverse_translate
1657             {
1658 20001     20001 1 2393998 my ($self, @args) = @_;
1659              
1660 20001         63003 my ($pep, $algorithm)
1661             = $self->_rearrange([qw(
1662             peptide
1663             algorithm)], @args);
1664              
1665             $self->throw("no codon table has been defined")
1666 20001 50       502277 unless $self->{codontable};
1667              
1668             $self->throw("no RSCU table has been defined")
1669 20001 50       41264 unless $self->{rscutable};
1670              
1671 20001 50       38870 $self->throw("no peptide sequence provided")
1672             unless $pep;
1673              
1674 20001         44435 my $str = $self->_stripdown($pep, q{}, 1);
1675              
1676 20001         59401 my ($newstr, @baddies) = _sanitize($str, 'pep');
1677 20001         34973 $str = $newstr;
1678 20001 50       38376 if (scalar @baddies)
1679             {
1680 0         0 print "\nGD_WARNING: removed bad characters (", join q{, }, @baddies;
1681 0         0 print ") from input sequence\n";
1682             }
1683 20001   50     36889 $algorithm = $algorithm || "balanced";
1684 20001         32326 $algorithm = lc $algorithm;
1685 20001         40350 $algorithm =~ s{\;}{}xg;
1686 20001         34534 my $name = "_reversetranslate" . "_" . $algorithm;
1687 20001         53835 my $subref = \&$name;
1688 20001   33     59609 my $seq = &$subref($self->{reversecodontable}, $self->{rscutable}, $str)
1689             || $self->throw("Can't reverse translate with $algorithm? $!");
1690              
1691 20001 50       50931 if (ref $pep)
1692             {
1693 20001         54489 my $newobj = $pep->clone();
1694 20001 50       598532 my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1695 20001         269345 $desc .= "$algorithm reverse translated with " . $self->{organism};
1696 20001         32014 $desc .= " RSCU values";
1697 20001         46199 $newobj->seq($seq);
1698 20001         1556640 $newobj->desc($desc);
1699 20001         216864 my $CDS = Bio::SeqFeature::Generic->new
1700             (
1701             -primary => 'CDS',
1702             -start => 1,
1703             -end => length $seq,
1704             -tag => {
1705             label => $newobj->id . '_CDS'
1706             },
1707             );
1708 20001 50       4756265 $newobj->add_SeqFeature($CDS) || $self->throw("Cannot add CDS");
1709 20001         796497 return $newobj;
1710             }
1711             else
1712             {
1713 0         0 return $seq;
1714             }
1715             }
1716              
1717             =head2 codon_juggle_algorithms
1718              
1719             =cut
1720              
1721             sub codon_juggle_algorithms
1722             {
1723 0     0 1 0 return Bio::GeneDesign::CodonJuggle::_list_algorithms();
1724             }
1725              
1726             =head2 codon_juggle
1727              
1728             =cut
1729              
1730             sub codon_juggle
1731             {
1732 20003     20003 1 687692 my ($self, @args) = @_;
1733              
1734 20003         56434 my ($seq, $algorithm)
1735             = $self->_rearrange([qw(
1736             sequence
1737             algorithm)], @args);
1738              
1739             $self->throw("no codon table has been defined")
1740 20003 50       396189 unless $self->{codontable};
1741              
1742             $self->throw("no RSCU table has been defined")
1743 20003 50       32389 unless $self->{rscutable};
1744              
1745 20003 50       30907 $self->throw("no nucleotide sequence provided")
1746             unless $seq;
1747              
1748 20003 50       27626 $self->throw("no algorithm provided")
1749             unless $algorithm;
1750              
1751 20003         36930 my $str = $self->_stripdown($seq, q{}, 0);
1752              
1753             ##REPLACE THIS WITH CODE THAT GRACEFULLY JUGGLES JUST CDSES OR GENES
1754 20003 50       36644 $self->throw("sequence does not appear to be the right length to be a gene")
1755             unless length($str) % 3 == 0;
1756              
1757 20003         26899 $algorithm = lc $algorithm;
1758 20003         35934 $algorithm =~ s/\W//xg;
1759 20003         29904 my $name = "_codonJuggle_" . $algorithm;
1760 20003         38991 my $subref = \&$name;
1761             my $newseq = &$subref($self->{codontable},
1762             $self->{reversecodontable},
1763             $self->{rscutable},
1764 20003   33     48912 $str
1765             ) || $self->throw("Can't run $algorithm? $!");
1766 20003 50       39948 if (ref $seq)
1767             {
1768 20003         42686 my $newobj = $seq->clone();
1769 20003 50       468520 my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1770 20003         221761 $desc .= "$algorithm codon juggled with " . $self->{organism};
1771 20003         26560 $desc .= " RSCU values";
1772 20003         41684 $newobj->seq($newseq);
1773 20003         1244626 $newobj->desc($desc);
1774 20003         184508 return $newobj;
1775             }
1776             else
1777             {
1778 0         0 return $newseq;
1779             }
1780             }
1781              
1782             =head2 subtract_sequence
1783              
1784             =cut
1785              
1786             sub subtract_sequence
1787             {
1788 4     4 1 3305 my ($self, @args) = @_;
1789              
1790 4         17 my ($seq, $rem) = $self->_rearrange([qw(sequence remove)], @args);
1791              
1792             $self->throw("no codon table has been defined")
1793 4 50       81 unless $self->{codontable};
1794              
1795             $self->throw("no RSCU table has been defined")
1796 4 50       11 unless $self->{rscutable};
1797              
1798 4 50       10 $self->throw("no nucleotide sequence provided")
1799             unless $seq;
1800              
1801 4 50       19 $self->throw("no sequence to be removed was defined")
1802             unless ($rem);
1803              
1804 4         15 my $str = $self->_stripdown($seq, q{}, 0);
1805              
1806 4         7 my $regarr;
1807 4         8 my $less = $rem;
1808 4 50       10 if (ref($rem))
1809             {
1810 4 100 66     28 if ($rem->isa("Bio::Seq") || $rem->isa("Bio::SeqFeatureI"))
    50          
1811             {
1812 3 50       16 $less = ref($rem->seq) ? $rem->seq->seq : $rem->seq;
1813 3         74 $regarr = _regarr($less, 1);
1814             }
1815             elsif ($rem->isa("Bio::GeneDesign::RestrictionEnzyme"))
1816             {
1817 1         5 $less = $rem->seq;
1818 1         5 $regarr = $rem->regex;
1819             }
1820             else
1821             {
1822 0         0 $self->throw("removal argument is not a Bio::Seq, Bio::SeqFeature, or "
1823             . "Bio::GeneDesign::RestrictionEnzyme");
1824             }
1825             }
1826             else
1827             {
1828 0         0 $regarr = _regarr($less, 1);
1829             }
1830              
1831             my $newseq = _subtract( uc $str,
1832             uc $less,
1833             $regarr,
1834             $self->{codontable},
1835             $self->{rscutable},
1836             $self->{reversecodontable}
1837 4         26 );
1838              
1839 4 50       14 if (ref $seq)
1840             {
1841 4         20 my $newobj = $seq->clone();
1842 4 50       161 my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1843 4         64 $desc .= $rem->id . " subtracted with " . $self->{organism};
1844 4         50 $desc .= " RSCU values";
1845 4         12 $newobj->seq($newseq);
1846 4         311 $newobj->desc($desc);
1847 4         51 return $newobj;
1848             }
1849             else
1850             {
1851 0         0 return $newseq;
1852             }
1853             }
1854              
1855             =head2 repeat_smash
1856              
1857             =cut
1858              
1859             sub repeat_smash
1860             {
1861 0     0 1 0 my ($self, @args) = @_;
1862              
1863 0         0 my ($seq) = $self->_rearrange([qw(sequence)], @args);
1864              
1865             $self->throw("no codon table has been defined")
1866 0 0       0 unless $self->{codontable};
1867              
1868             $self->throw("no RSCU table has been defined")
1869 0 0       0 unless $self->{rscutable};
1870              
1871 0 0       0 $self->throw("no nucleotide sequence provided")
1872             unless $seq;
1873              
1874 0         0 my $str = $self->_stripdown($seq, q{}, 0);
1875              
1876              
1877             my $newseq = _minimize_local_alignment_dp
1878             (
1879             $str,
1880             $self->{codontable},
1881             $self->{reversecodontable},
1882             $self->{rscutable}
1883 0         0 );
1884 0 0       0 if (ref $seq)
1885             {
1886 0         0 my $newobj = $seq->clone();
1887 0 0       0 my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1888 0         0 $desc .= $seq->id . " repeat smashed with " . $self->{organism};
1889 0         0 $desc .= " RSCU values";
1890 0         0 $newobj->seq($newseq);
1891 0         0 $newobj->desc($desc);
1892 0         0 return $newobj;
1893             }
1894             else
1895             {
1896 0         0 return $newseq;
1897             }
1898             }
1899              
1900             =head2 make_amplification_primers
1901              
1902             NO TEST
1903              
1904             =cut
1905              
1906             sub make_amplification_primers
1907             {
1908 0     0 1 0 my ($self, @args) = @_;
1909              
1910 0         0 my ($seq, $temp) = $self->_rearrange([qw(sequence temperature)], @args);
1911              
1912 0 0       0 $self->throw("no sequence provided") unless ($seq);
1913 0   0     0 $temp = $temp || 60;
1914              
1915 0         0 my $str = $self->_stripdown($seq, q{}, 0);
1916              
1917 0         0 return _make_amplification_primers($str, $temp);
1918             }
1919              
1920             =head2 contains_homopolymer
1921              
1922             Returns 1 if the sequence contains a homopolymer of the provided length (default
1923             is 5bp) and 0 else
1924              
1925             =cut
1926              
1927             sub contains_homopolymer
1928             {
1929 0     0 1 0 my ($self, @args) = @_;
1930              
1931 0         0 my ($seq, $length) = $self->_rearrange([qw(sequence length)], @args);
1932              
1933 0 0       0 $self->throw("no sequence provided") unless ($seq);
1934 0   0     0 $length = $length || 5;
1935              
1936 0         0 my $str = $self->_stripdown($seq, q{}, 0);
1937              
1938 0         0 return _check_for_homopolymer($str, $length);
1939             }
1940              
1941             =head2 filter_homopolymers
1942              
1943             =cut
1944              
1945             sub filter_homopolymers
1946             {
1947 0     0 1 0 my ($self, @args) = @_;
1948              
1949 0         0 my ($seqs, $length) = $self->_rearrange([qw(sequences length)], @args);
1950              
1951 0 0       0 $self->throw("no argument provided to filter_homopolymers")
1952             unless $seqs;
1953 0   0     0 $length = $length || 5;
1954              
1955 0         0 my $arrref = $self->_stripdown($seqs, 'ARRAY', 1);
1956              
1957 0         0 my $seqarr = _filter_homopolymer( $arrref, $length);
1958 0         0 return $seqarr;
1959             }
1960              
1961             =head2 find_runs
1962              
1963             =cut
1964              
1965             sub find_runs
1966             {
1967 0     0 1 0 my ($self, @args) = @_;
1968              
1969 0         0 my ($seq, $pattern, $min) = $self->_rearrange([qw(
1970             sequence pattern minimum)], @args);
1971            
1972 0 0       0 if (! $pattern)
1973             {
1974 0         0 $self->throw("no pattern argument provided to find_runs");
1975             }
1976 0   0     0 $min = $min || 5;
1977            
1978 0         0 return _find_runs($seq, $pattern, $min);
1979             }
1980              
1981             =head2 make_graph
1982              
1983             =cut
1984              
1985             sub make_graph
1986             {
1987 0     0 1 0 my ($self, @args) = @_;
1988              
1989             $self->throw("Graphing is not available")
1990 0 0       0 unless $self->{graph};
1991              
1992 0         0 my ($seqobjs, $window)
1993             = $self->_rearrange([qw(sequences window)], @args);
1994              
1995             $self->throw("no codon table has been defined")
1996 0 0       0 unless $self->{codontable};
1997              
1998             $self->throw("no RSCU table has been defined")
1999 0 0       0 unless $self->{rscutable};
2000              
2001 0 0       0 $self->throw("no nucleotide sequences provided")
2002             unless $seqobjs;
2003              
2004 0 0       0 $self->throw("sequences argument is not an array reference")
2005             unless ref($seqobjs) eq "ARRAY";
2006              
2007 0         0 foreach my $seqobj (@$seqobjs)
2008             {
2009 0 0       0 $self->throw(ref($seqobj) . " is not a Bio::Seq object")
2010             unless $seqobj->isa("Bio::Seq");
2011              
2012 0 0       0 $self->throw("$seqobj is not a nucleotide sequence")
2013             unless $seqobj->alphabet eq "dna";
2014             }
2015              
2016             my ($graph, $format) = _make_graph( $seqobjs,
2017             $window,
2018             $self->{organism},
2019             $self->{codontable},
2020             $self->{rscutable},
2021 0         0 $self->{reversecodontable});
2022 0         0 return ($graph, $format);
2023             }
2024              
2025             =head2 make_dotplot
2026              
2027             =cut
2028              
2029             sub make_dotplot
2030             {
2031 0     0 1 0 my ($self, @args) = @_;
2032              
2033             $self->throw("Graphing is not available")
2034 0 0       0 unless $self->{graph};
2035              
2036 0         0 my ($seq1, $seq2, $window, $stringency)
2037             = $self->_rearrange([qw(first second window stringency)], @args);
2038              
2039 0   0     0 $window = $window || 10;
2040 0   0     0 $stringency = $stringency || 10;
2041              
2042 0 0 0     0 $self->throw("no nucleotide sequences provided")
2043             unless ($seq1 && $seq2);
2044              
2045 0         0 foreach my $seqobj ($seq1, $seq2)
2046             {
2047 0 0       0 $self->throw(ref($seqobj) . " is not a Bio::Seq object")
2048             unless $seqobj->isa("Bio::Seq");
2049              
2050 0 0       0 $self->throw("$seqobj is not a nucleotide sequence")
2051             unless $seqobj->alphabet eq "dna";
2052             }
2053              
2054 0         0 my $graph = _dotplot(
2055             $seq1->seq,
2056             $seq2->seq,
2057             $window,
2058             $stringency
2059             );
2060 0         0 return $graph;
2061             }
2062              
2063             =head2 import_seqs
2064              
2065             NO TEST
2066              
2067             =cut
2068              
2069             sub import_seqs
2070             {
2071 1     1 1 376 my ($self, $path) = @_;
2072 1 50       21 $self->throw("$path does not exist.") if (! -e $path);
2073 1         7 my ($iterator, $filename, $suffix) = _import_sequences($path);
2074 1         4 return ($iterator, $filename, $suffix);
2075             }
2076             =head2 import_seq_from_string
2077              
2078             NO TEST
2079              
2080             =cut
2081              
2082             sub import_seq_from_string
2083             {
2084 0     0 0 0 my ($self, $string) = @_;
2085 0         0 my ($iterator, $filename, $suffix) = _import_sequences_from_string($string);
2086 0         0 return ($iterator, $filename, $suffix);
2087             }
2088              
2089             =head2 export_formats
2090              
2091             Export formats that have been tried and tested to work well.
2092              
2093             =cut
2094              
2095             sub export_formats
2096             {
2097 0     0 1 0 return Bio::GeneDesign::IO::_export_formats();
2098             }
2099              
2100             =head2 export_seqs
2101              
2102             NO TEST
2103              
2104             =cut
2105              
2106             sub export_seqs
2107             {
2108 0     0 1 0 my ($self, @args) = @_;
2109              
2110 0         0 my ($outpath, $outformat, $seqarr)
2111             = $self->_rearrange([qw(
2112             filepath
2113             format
2114             sequences)], @args);
2115              
2116 0 0       0 $outformat = $outformat ? $outformat : 'genbank';
2117 0 0       0 $self->throw("$outformat is not a format recognized by BioPerl")
2118             if (! _isa_BP_format($outformat));
2119              
2120             #Long attributes that come in from a genbank file will have corruption
2121             #remove spaces and reattribute to fix bbs in genbank file ):
2122 0 0       0 _long_att_fix($seqarr) if ($outformat eq 'genbank');
2123            
2124 0         0 return _export_sequences($outpath, $outformat, $seqarr);
2125             }
2126              
2127             =head2 random_dna
2128              
2129             =cut
2130              
2131             sub random_dna
2132             {
2133 30000     30000 1 149009 my ($self, @args) = @_;
2134              
2135 30000         61811 my ($rlen, $rgc, $rstop)
2136             = $self->_rearrange([qw(
2137             length
2138             gc_percentage
2139             no_stops)], @args);
2140              
2141             $self->throw("no codon table has been defined")
2142 30000 50 33     503282 if ($rstop && ! $self->{codontable});
2143              
2144 30000   100     49969 $rgc = $rgc || 50;
2145 30000 50 33     87887 $self->throw("gc_percentage must be between 0 and 100")
      33        
2146             if ($rgc && ($rgc < 0 || $rgc > 100));
2147              
2148 30000 50 33     75979 if (! $rlen || $rlen < 1)
    50          
2149             {
2150 0         0 return q{};
2151             }
2152             elsif ($rlen == 1)
2153             {
2154 30000 50       56506 return $rgc ? _randombase_weighted($rgc) : _randombase;
2155             }
2156 0         0 return _randomDNA($rlen, $rgc, $rstop, $self->{codontable});
2157             }
2158              
2159             =head2 replace_ambiguous_bases
2160              
2161             =cut
2162              
2163             sub replace_ambiguous_bases
2164             {
2165 3     3 1 18 my ($self, $seq) = @_;
2166              
2167 3 50       7 $self->throw("no sequence provided ")
2168             unless ($seq);
2169              
2170 3         13 my $str = $self->_stripdown($seq, q{}, 1);
2171              
2172 3         11 my $newstr = _replace_ambiguous_bases($str);
2173              
2174 3 50       8 if (ref $seq)
2175             {
2176 0         0 my $newobj = $seq->clone();
2177 0 0       0 my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
2178 0         0 $desc .= "deambiguated";
2179 0         0 $newobj->seq($newstr);
2180 0         0 $newobj->desc($desc);
2181 0         0 return $newobj;
2182             }
2183             else
2184             {
2185 3         7 return $newstr;
2186             }
2187             }
2188              
2189             =head1 PLEASANTRIES
2190              
2191             =head2 pad
2192              
2193             my $name = 5;
2194             my $nice = $GD->pad($name, 3);
2195             $nice == "005" || die;
2196              
2197             $name = "oligo";
2198             $nice = $GD->pad($name, 7, "_");
2199             $nice == "__oligo" || die;
2200              
2201             Pads an integer with leading zeroes (by default) or any provided set of
2202             characters. This is useful both to make reports pretty and to standardize the
2203             length of designations.
2204              
2205             =cut
2206              
2207             sub pad
2208             {
2209 0     0 1 0 my ($self, $num, $thickness, $chars) = @_;
2210 0         0 my $t = $num;
2211 0   0     0 $chars = $chars || "0";
2212 0         0 $t = $chars . $t while (length($t) < $thickness);
2213 0         0 return $t;
2214             }
2215              
2216             =head2 attitude
2217              
2218             my $adverb = $GD->attitude();
2219              
2220             Ask GeneDesign how it handled your request.
2221              
2222             =cut
2223              
2224             sub attitude
2225             {
2226 0     0 1 0 my @adverbs = qw(Elegantly Energetically Enthusiastically Excitedly Daintily
2227             Deliberately Diligently Dreamily Courageously Cooly Cleverly Cheerfully
2228             Carefully Calmly Briskly Blindly Bashfully Absentmindedly Awkwardly
2229             Faithfully Ferociously Fervently Fiercely Fondly Gently Gleefully Gratefully
2230             Gracefully Happily Helpfully Heroically Honestly Joyfully Jubilantly
2231             Jovially Keenly Kindly Knowingly Kookily Loftily Lovingly Loyally
2232             Majestically Mechanically Merrily Mostly Neatly Nicely Obediently Officially
2233             Optimistically Patiently Perfectly Playfully Positively Powerfully
2234             Punctually Properly Promptly Quaintly Quickly Quirkily Rapidly Readily
2235             Reassuringly Righteously Sedately Seriously Sharply Shyly Silently Smoothly
2236             Solemnly Speedily Strictly Successfully Suddenly Sweetly Swiftly Tenderly
2237             Thankfully Throroughly Thoughtfully Triumphantly Ultimately Unabashedly
2238             Utterly Upliftingly Urgently Usefully Valiantly Victoriously Vivaciously
2239             Warmly Wholly Wisely Wonderfully Yawningly Zealously Zestfully
2240             );
2241 0         0 my $index = _random_index(scalar(@adverbs));
2242 0         0 return $adverbs[$index];
2243             }
2244              
2245             =head2 endslash
2246              
2247             =cut
2248              
2249             sub endslash
2250             {
2251 0     0 1 0 my ($self, $path) = @_;
2252 0 0       0 if ((substr $path, -1, 1) ne q{/})
2253             {
2254 0         0 $path .= q{/};
2255             }
2256 0         0 return $path;
2257             }
2258              
2259             =head2 _stripdown
2260              
2261             =cut
2262              
2263             sub _stripdown
2264             {
2265 40063     40063   65804 my ($self, $seqarg, $type, $enz_allowed) = @_;
2266              
2267 40063   100     88566 $enz_allowed = $enz_allowed || 0;
2268 40063 100       90632 my @seqs = ref $seqarg eq 'ARRAY' ? @$seqarg : ($seqarg);
2269 40063         50476 my @list;
2270 40063         62089 foreach my $seq (@seqs)
2271             {
2272 40063         52913 my $str = $seq;
2273 40063         55333 my $ref = ref $seq;
2274 40063 100       65914 if ($ref)
2275             {
2276 40028         73365 my $bit = $self->_checkref($seq, $enz_allowed);
2277 40028 50       78889 $self->throw("object $ref is not a compatible object $bit") if ($bit < 1);
2278 40028 50       80507 $str = ref $seq->seq ? $seq->seq->seq : $seq->seq;
2279             }
2280 40063         833833 push @list, uc $str;
2281             }
2282 40063 100       76669 return \@list if ($type eq 'ARRAY');
2283 40060         78580 return $list[0];
2284             }
2285              
2286             =head2 _checkref
2287              
2288             =cut
2289              
2290             sub _checkref
2291             {
2292 40028     40028   60430 my ($self, $pobj, $enz_allowed) = @_;
2293 40028         51913 my $ref = ref $pobj;
2294 40028 50       67021 return -1 if (! $ref);
2295 40028   100     72644 $enz_allowed = $enz_allowed || 0;
2296 40028         55675 my ($bioseq, $bioseqfeat) = (0, 0);
2297 40028         84711 $bioseq = $pobj->isa("Bio::Seq");
2298 40028         103258 $bioseqfeat = $pobj->isa("Bio::SeqFeatureI");
2299 40028 100       67272 if ($enz_allowed)
2300             {
2301 20016         47310 $enz_allowed = $pobj->isa("Bio::GeneDesign::RestrictionEnzyme");
2302             }
2303 40028         70439 return $bioseq + $bioseqfeat + $enz_allowed;
2304             }
2305              
2306             1;
2307              
2308             __END__