File Coverage

blib/lib/Bio/GeneDesign.pm

Criterion	Covered	Total	%
statement	391	629	62.1
branch	117	318	36.7
condition	33	98	33.6
subroutine	53	84	63.1
pod	64	65	98.4
total	658	1194	55.1

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# GeneDesign engine
3							#
4
5							=head1 NAME
6
7							Bio::GeneDesign
8
9							=head1 VERSION
10
11							Version 5.56
12
13							=head1 DESCRIPTION
14
15							=head1 AUTHOR
16
17							Sarah Richardson
18
19							=cut
20
21							package Bio::GeneDesign;
22	11			11		973986	use base qw(Bio::Root::Root);
	11					107
	11					5357
23
24	11			11		425477	use Bio::GeneDesign::ConfigData;
	11					24
	11					398
25	11			11		4331	use Bio::GeneDesign::Basic qw(:GD);
	11					30
	11					2317
26	11			11		4283	use Bio::GeneDesign::IO qw(:GD);
	11					27
	11					1652
27	11			11		4590	use Bio::GeneDesign::CodonJuggle qw(:GD);
	11					28
	11					1465
28	11			11		76	use Bio::GeneDesign::Codons qw(:GD);
	11					21
	11					1480
29	11			11		5574	use Bio::GeneDesign::Oligo qw(:GD);
	11					28
	11					1162
30	11			11		72	use Bio::GeneDesign::Random qw(:GD);
	11					16
	11					997
31	11			11		4358	use Bio::GeneDesign::RestrictionEnzymes qw(:GD);
	11					27
	11					1205
32	11			11		4323	use Bio::GeneDesign::ReverseTranslate qw(:GD);
	11					23
	11					1058
33	11			11		4150	use Bio::GeneDesign::PrefixTree;
	11					27
	11					300
34	11			11		5826	use Bio::SeqFeature::Generic;
	11					539526
	11					361
35	11			11		97	use File::Basename;
	11					26
	11					694
36	11			11		67	use Bio::Seq;
	11					22
	11					164
37	11			11		50	use Carp;
	11					20
	11					463
38
39	11			11		58	use strict;
	11					20
	11					190
40	11			11		52	use warnings;
	11					21
	11					74640
41
42							my $VERSION = 5.56;
43
44							=head1 CONSTRUCTORS
45
46							=head2 new
47
48							Returns an initialized Bio::GeneDesign object.
49
50							This function reads the ConfigData written at installation, imports the
51							relevant sublibraries, and sets the relevant paths.
52
53							my $GD = Bio::GeneDesign->new();
54
55							=cut
56
57							sub new
58							{
59	9			9	1	995	my ($class, @args) = @_;
60	9					105	my $self = $class->SUPER::new(@args);
61	9					112	bless $self, $class;
62
63	9					69	$self->{script_path} = Bio::GeneDesign::ConfigData->config('script_path');
64	9					36	$self->{conf} = Bio::GeneDesign::ConfigData->config('conf_path');
65	9	50				52	$self->{conf} .= q{/} unless substr($self->{conf}, -1, 1) eq q{/};
66
67	9					33	$self->{tmp_path} = Bio::GeneDesign::ConfigData->config('tmp_path');
68	9	50				39	$self->{tmp_path} .= q{/} unless substr($self->{tmp_path}, -1, 1) eq q{/};
69
70	9					29	$self->{graph} = Bio::GeneDesign::ConfigData->config('graphing_support');
71	9	50				35	if ($self->{graph} > 0)
72							{
73	0					0	require Bio::GeneDesign::Graph;
74	0					0	import Bio::GeneDesign::Graph qw(:GD);
75							}
76
77	9					28	$self->{EMBOSS} = Bio::GeneDesign::ConfigData->config('EMBOSS_support');
78	9	50				31	if ($self->{EMBOSS})
79							{
80	0					0	require Bio::GeneDesign::Palindrome;
81	0					0	import Bio::GeneDesign::Palindrome qw(:GD);
82							}
83
84	9					52	$self->{BLAST} = Bio::GeneDesign::ConfigData->config('BLAST_support');
85	9	50				34	if ($self->BLAST)
86							{
87							#$ENV{BLASTPLUSDIR} = Bio::GeneDesign::ConfigData->config('blast_path');
88	0					0	require Bio::GeneDesign::Blast;
89	0					0	import Bio::GeneDesign::Blast qw(:GD);
90							}
91
92	9					27	$self->{vmatch} = Bio::GeneDesign::ConfigData->config('vmatch_support');
93	9	50				41	if ($self->{vmatch})
94							{
95	0					0	require Bio::GeneDesign::Vmatch;
96	0					0	import Bio::GeneDesign::Vmatch qw(:GD);
97							}
98
99	9					41	$self->{codon_path} = $self->{conf} . 'codon_tables/';
100	9					23	$self->{organism} = undef;
101	9					18	$self->{codontable} = undef;
102	9					19	$self->{enzyme_set} = undef;
103	9					18	$self->{version} = $VERSION;
104	9					22	$self->{amb_trans_memo} = {};
105
106	9					25	return $self;
107							}
108
109							=head1 ACCESSORS
110
111							=cut
112
113							=head2 codon_path
114
115							returns the directory containing codon tables
116
117							=cut
118
119							sub codon_path
120							{
121	0			0	1	0	my ($self) = @_;
122	0					0	return $self->{codon_path};
123							}
124
125							=head2 EMBOSS
126
127							returns a value if EMBOSS_support was vetted and approved during installation.
128
129							=cut
130
131							sub EMBOSS
132							{
133	0			0	1	0	my ($self) = @_;
134	0					0	return $self->{'EMBOSS'};
135							}
136
137							=head2 BLAST
138
139							returns a value if BLAST_support was vetted and approved during installation.
140
141							=cut
142
143							sub BLAST
144							{
145	9			9	1	22	my ($self) = @_;
146	9					29	return $self->{'BLAST'};
147							}
148
149							=head2 graph
150
151							returns a value if graphing_support was vetted and approved during installation.
152
153							=cut
154
155							sub graph
156							{
157	0			0	1	0	my ($self) = @_;
158	0					0	return $self->{'graph'};
159							}
160
161							=head2 vmatch
162
163							returns a value if vmatch_support was vetted and approved during installation.
164
165							=cut
166
167							sub vmatch
168							{
169	0			0	1	0	my ($self) = @_;
170	0					0	return $self->{'vmatch'};
171							}
172
173							=head2 enzyme_set
174
175							Returns a hash reference where the keys are enzyme names and the values are
176							L objects, if the enzyme
177							set has been defined.
178
179							To set this value, use L.
180
181							=cut
182
183							sub enzyme_set
184							{
185	4			4	1	853	my ($self) = @_;
186	4					17	return $self->{'enzyme_set'};
187							}
188
189							=head2 enzyme_set_name
190
191							Returns the name of the enzyme set in use, if there is one.
192
193							To set this value, use L.
194
195							=cut
196
197							sub enzyme_set_name
198							{
199	0			0	1	0	my ($self) = @_;
200	0					0	return $self->{'enzyme_set_name'};
201							}
202
203							=head2 all_enzymes
204
205							Returns a hash reference where the keys are enzyme names and the values are
206							L objects
207
208							To set this value, use L.
209
210							=cut
211
212							sub all_enzymes
213							{
214	0			0	1	0	my ($self) = @_;
215	0					0	return $self->{'all_enzymes'};
216							}
217
218							=head2 organism
219
220							Returns the name of the organism in use, if there is one.
221
222							To set this value, use L.
223
224							=cut
225
226							sub organism
227							{
228	0			0	1	0	my ($self) = @_;
229	0					0	return $self->{'organism'};
230							}
231
232							=head2 codontable
233
234							Returns the codon table in use, if there is one.
235
236							The codon table is a hash reference where the keys are upper case nucleotides
237							and the values are upper case single letter amino acids.
238
239							my $codon_t = $GD->codontable();
240							$codon_t->{"ATG"} eq "M" \|\| die;
241
242							To set this value, use L.
243
244							=cut
245
246							sub codontable
247							{
248	1			1	1	571	my ($self) = @_;
249	1					6	return $self->{'codontable'};
250							}
251
252							=head2 reversecodontable
253
254							Returns the reverse codon table in use, if there is one.
255
256							The reverse codon table is a hash reference where the keys are upper case single
257							letter amino acids and the values are upper case nucleotides.
258
259							my $revcodon_t = $GD->reversecodontable();
260							$revcodon_t->{"M"} eq "ATG" \|\| die;
261
262							This value is set automatically when L is run.
263
264							=cut
265
266							sub reversecodontable
267							{
268	1			1	1	1209	my ($self) = @_;
269	1					3	return $self->{'reversecodontable'};
270							}
271
272							=head2 rscutable
273
274							Returns the RSCU table in use, if there is one.
275
276							The RSCU codon table is a hash reference where the keys are upper case
277							nucleotides and the values are floats.
278
279							my $rscu_t = $GD->rscutable();
280							$rscu_t->{"ATG"} eq 1.00 \|\| die;
281
282							To set this value, use L.
283
284							=cut
285
286							sub rscutable
287							{
288	1			1	1	3536	my ($self) = @_;
289	1					5	return $self->{'rscutable'};
290							}
291
292
293							=head1 FUNCTIONS
294
295							=cut
296
297							=head2 melt
298
299							my $Tm = $GD->melt(-sequence => $myseq);
300
301							The -sequence argument is required.
302
303							Returns the melting temperature of a DNA sequence.
304
305							You can set the salt and DNA concentrations with the -salt and -concentration
306							arguments; they are 50mm (.05) and 100 pm (.0000001) respectively.
307
308							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
309							object to be analyzed with the -sequence flag.
310
311							There are four different formulae to choose from. If you wish to use the nearest
312							neighbor method, use the -nearest_neighbor flag. Otherwise the appropriate
313							formula will be determined by the length of your -sequence argument.
314
315							For sequences under 14 base pairs:
316							Tm = (4 * #GC) + (2 * #AT).
317
318							For sequences between 14 and 50 base pairs:
319							Tm = 100.5 + (41 * #GC / length) - (820 / length) + 16.6 * log10(salt)
320
321							For sequences over 50 base pairs:
322							Tm = 81.5 + (41 * #GC / length) - (500 / length) + 16.6 * log10(salt) - .62;
323
324							=cut
325
326							sub melt
327							{
328	4			4	1	5473	my ($self, @args) = @_;
329
330	4					18	my ($seq, $salt, $conc, $nnflag)
331							= $self->_rearrange([qw(
332							sequence
333							salt
334							concentration
335							nearest_neighbor)], @args);
336
337	4	50				84	$self->throw("No sequence provided for the melt function")
338							unless ($seq);
339
340	4	100				10	$nnflag = $nnflag ? 1 : 0;
341	4		50			15	$salt = $salt \|\| .05;
342	4		50			10	$conc = $conc \|\| .0000001;
343	4					9	my $str = $self->_stripdown($seq, q{}, 1);
344
345	4	100				9	if ($nnflag)
346							{
347	1					4	return _ntherm($str, $salt, $conc);
348							}
349	3					11	return _melt($str, $salt, $conc);
350							}
351
352							=head2 complement
353
354							$my_seq = "AATTCG";
355
356							my $complemented_seq = $GD->complement($my_seq);
357							$complemented_seq eq "TTAAGC" \|\| die;
358
359							my $reverse_complemented_seq = $GD->complement($my_seq, 1);
360							$reverse_complemented_seq eq "CGAATT" \|\| die;
361
362							#clean
363							my $complemented_seq = $GD->complement(-sequence => $my_seq);
364							$complemented_seq eq "TTAAGC" \|\| die;
365
366							my $reverse_complemented_seq = $GD->complement(-sequence => $my_seq,
367							-reverse => 1);
368							$reverse_complemented_seq eq "CGAATT" \|\| die;
369
370
371							The -sequence argument is required.
372
373							Complements or reverse complements a DNA sequence.
374
375							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
376							object to be processed.
377
378							If you also pass along a true statement, the sequence will be reversed and
379							complemented.
380
381							=cut
382
383							sub complement
384							{
385	4			4	1	4830	my ($self, @args) = @_;
386
387	4					16	my ($seq, $swit)
388							= $self->_rearrange([qw(
389							sequence
390							reverse)], @args);
391
392	4	50				31	$self->throw("No sequence provided for the complement function")
393							unless $seq;
394
395	4		100			13	$swit = $swit \|\| 0;
396
397	4					7	my $str = $self->_stripdown($seq, q{}, 1);
398
399	4					12	return _complement($str, $swit);
400							}
401
402							=head2 rcomplement
403
404							Sugar time!
405
406							$my_seq = "AATTCG";
407
408							my $reverse_complemented_seq = $GD->rcomplement($my_seq);
409							$reverse_complemented_seq eq "CGAATT" \|\| die;
410
411							#clean
412
413							my $reverse_complemented_seq = $GD->complement(-sequence => $my_seq,
414							-reverse => 1);
415							$reverse_complemented_seq eq "CGAATT" \|\| die;
416
417
418							The -sequence argument is required.
419
420							Reverse complements a DNA sequence.
421
422							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
423							object to be processed.
424
425							=cut
426
427							sub rcomplement
428							{
429	3			3	1	4219	my ($self, @args) = @_;
430
431	3					11	my ($seq) = $self->_rearrange([qw(sequence)], @args);
432
433	3	50				22	$self->throw('No sequence provided for the rcomplement function')
434							unless $seq;
435
436	3					8	my $str = $self->_stripdown($seq, q{}, 1);
437
438	3					9	return _complement($str, 1);
439							}
440
441							=head2 transcribe
442
443							$my_seq = "AATTCG";
444
445							my $RNA_seq = $GD->transcribe($my_seq);
446							$complemented_seq eq "AAUUCG" \|\| die;
447
448							The -sequence argument is required.
449
450							Transcribes an RNA sequence from a DNA sequence.
451
452							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
453							object to be processed.
454
455							=cut
456
457							sub transcribe
458							{
459	0			0	1	0	my ($self, @args) = @_;
460
461	0					0	my ($seq) = $self->_rearrange([qw(sequence)], @args);
462
463	0	0				0	$self->throw("No sequence provided for the transcribe function")
464							unless $seq;
465
466	0					0	my $str = $self->_stripdown($seq, q{}, 1);
467
468	0					0	return _toRNA($str);
469							}
470
471							=head2 count
472
473							$my_seq = "AATTCG";
474							my $count = $GD->count($my_seq);
475							$count->{C} == 1 \|\| die;
476							$count->{G} == 1 \|\| die;
477							$count->{A} == 2 \|\| die;
478							$count->{GCp} == 33.3 \|\| die;
479							$count->{ATp} == 66.7 \|\| die;
480
481							#clean
482							my $count = $GD->count(-sequence => $my_seq);
483
484							You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
485							object.
486
487							the count function counts the bases in a DNA sequence and returns a hash
488							reference where each base (including the ambiguous bases) are keys and the
489							values are the number of times they appear in the sequence. There are also the
490							special values GCp and ATp for GC and AT percentage.
491
492							=cut
493
494							sub count
495							{
496	2			2	1	3776	my ($self, @args) = @_;
497
498	2					9	my ($seq) = $self->_rearrange([qw(sequence)], @args);
499
500	2	50				18	$self->throw("No sequence provided for the count function")
501							unless ($seq);
502
503
504	2					6	my $str = $self->_stripdown($seq, q{}, 1);
505
506	2					9	return _count($str);
507							}
508
509
510							=head2 GC_windows
511
512							takes a nucleotide sequence, a window size, and minimum and maximum values.
513							returns lists of real coordinates of subsequences that violate mimimum or
514							maximum GC percentages.
515
516							Values are returned inside an array reference such that the first value is an
517							array ref of minimum violators (as array refs of left/right coordinates), and
518							the second value is an array ref of maximum violators.
519
520							$return_value = [
521							[[left, right], [left, right]], #minimum violators
522							[[left, right], [left, right]] #maximum violators
523							];
524
525							=cut
526
527							sub GC_windows
528							{
529	0			0	1	0	my ($self, @args) = @_;
530
531	0					0	my ($seq, $win, $min, $max) = $self->_rearrange([qw(
532							sequence window minimum maximum)], @args);
533
534	0	0				0	$self->throw("No sequence provided for the GC_windows function")
535							unless ($seq);
536
537	0					0	my $str = $self->_stripdown($seq, q{}, 1);
538
539	0					0	return _gcwindows($str, $win, $min, $max);
540							}
541
542							=head2 regex_nt
543
544							my $my_seq = "ABC";
545							my $regex = $GD->regex_nt(-sequence => $my_seq);
546							# $regex is qr/A[CGT]C/;
547
548							my $regarr = $GD->regex_nt(-sequence => $my_seq --reverse_complement => 1);
549							# $regarr is [qr/A[CGT]C/, qr/G[ACG]T/]
550
551
552							You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
553							object to be processed with the -sequence flag.
554
555							regex_nt creates a compiled regular expression or a set of them that can be used
556							to query large nucleotide sequences for possibly ambiguous subsequences.
557
558
559							If you want to get regular expressions for both the forward and reverse senses
560							of the DNA, use the -reverse_complement flag and expect a reference to an array
561							of compiled regexes.
562
563							=cut
564
565							sub regex_nt
566							{
567	6			6	1	12702	my ($self, @args) = @_;
568
569	6					29	my ($seq, $arrswit)
570							= $self->_rearrange([qw(
571							sequence
572							reverse_complement)], @args
573							);
574
575	6	50				97	$self->throw("no sequence provided the regex_nt function")
576							unless ($seq);
577
578	6					16	my $str = $self->_stripdown($seq, q{}, 1);
579
580	6	100				14	if ($arrswit)
581							{
582	2					6	return _regarr($str);
583							}
584							else
585							{
586	4					14	return _regres($str, 1);
587							}
588							}
589
590							=head2 regex_aa
591
592							my $my_pep = "AEQ*";
593							my $regex = $GD->regex_aa(-sequence => $my_pep);
594							$regex == qr/AEQ[\*]/ \|\| die;
595
596							Creates a compiled regular expression or a set of them that can be used to query
597							large amino acid sequences for smaller subsequences.
598
599							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
600							object to be processed with the -sequence flag.
601
602							=cut
603
604							sub regex_aa
605							{
606	1			1	1	632	my ($self, $seq) = @_;
607
608	1	50				4	$self->throw("no sequence provided for the regex_aa function")
609							unless ($seq);
610
611	1					4	my $str = $self->_stripdown($seq, q{}, 1);
612
613	1					5	return _regres($str, 2);
614							}
615
616							=head2 sequence_is_ambiguous
617
618							my $my_seq = "ABC";
619							my $flag = $GD->sequence_is_ambiguous($my_seq);
620							$flag == 1 \|\| die;
621
622							$my_seq = "ATC";
623							$flag = $GD->sequence_is_ambiguous($my_seq);
624							$flag == 0 \|\| die;
625
626							Checks to see if a DNA sequence contains ambiguous bases (RYMKWSBDHVN) and
627							returns true if it does.
628
629							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
630							object to be processed.
631
632							=cut
633
634							sub sequence_is_ambiguous
635							{
636	5			5	1	2547	my ($self, $seq) = @_;
637
638	5	50				13	$self->throw("no sequence provided for the sequence_is_ambiguous function")
639							unless ($seq);
640
641	5					13	my $str = $self->_stripdown($seq, q{}, 1);
642
643	5					17	return _is_ambiguous($str);
644							}
645
646							=head2 ambiguous_translation
647
648							my $my_seq = "ABC";
649							my @peps = $GD->ambiguous_translation(-sequence => $my_seq, -frame => 1);
650							# @peps is qw(I T C)
651
652							You must pass a string variable, a Bio::Seq object, or a Bio::SeqFeatureI object
653							to be processed.
654
655							Translates a nucleotide sequence that may have ambiguous bases and returns an
656							array of possible peptides.
657
658							The frame argument may be 1, 2, 3, -1, -2, or -3.
659							It may also be t (three, 1, 2, 3), or s (six, 1, 2, 3, -1, -2, -3).
660							It defaults to 1.
661
662							=cut
663
664							sub ambiguous_translation
665							{
666	4			4	1	10776	my ($self, @args) = @_;
667
668	4					18	my ($seq, $frame)
669							= $self->_rearrange([qw(sequence frame)], @args);
670
671	4	50				124	$self->throw("no sequence provided for the ambiguous_translation function")
672							unless ($seq);
673
674	4		50			11	$frame = $frame \|\| 1;
675
676	4					8	my %ambtransswits = map {$_ => 1} qw(1 2 3 -1 -2 -3 s t);
	32					60
677
678							$self->throw("Bad frame argument to ambiguous_translation")
679	4	50				14	unless (exists $ambtransswits{$frame});
680
681	4					14	my $str = $self->_stripdown($seq, q{}, 1);
682
683							return _amb_translation($str,
684							$self->{codontable},
685							$frame,
686	4					19	$self->{amb_trans_memo});
687							}
688
689							=head2 ambiguous_transcription
690
691							my $my_seq = "ABC";
692							my $seqs = $GD->ambiguous_transcription($my_seq);
693							# $seqs is [qw(ACC AGC ATC)]
694
695							Deambiguates a nucleotide sequence that may have ambiguous bases and returns a
696							reference to a sorted array of possible unambiguous sequences.
697
698							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
699							object to be processed.
700
701							=cut
702
703							sub ambiguous_transcription
704							{
705	1			1	1	2919	my ($self, $seq) = @_;
706
707	1	50				4	$self->throw("no sequence provided for the ambiguous_transcription function")
708							unless ($seq);
709
710	1					4	my $str = $self->_stripdown($seq, q{}, 1);
711
712	1					5	return _amb_transcription($str);
713							}
714
715							=head2 positions
716
717							my $seq = "TGCTGACTGCAGTCAGTACACTACGTACGTGCATGAC";
718							my $seek = "CWC";
719
720							my $positions = $GD->positions(-sequence => $seq,
721							-query => $seek);
722							# $positions is {18 => "CAC"}
723
724							$positions = $GD->positions(-sequence => $seq,
725							-query => $seek,
726							-reverse_complement => 1);
727							# $positions is {18 => "CAC", 28 => "GTG"}
728
729							Finds and returns all the positions and sequences of a potentially ambiguous
730							subsequence in a larger sequence. The reverse_complement flag is off by default.
731
732							You can pass either string variables, Bio::Seq objects, or Bio::SeqFeatureI
733							objects as the sequence and query arguments; additionally you may pass a
734							L object as the query
735							argument.
736
737							=cut
738
739							sub positions
740							{
741	2			2	1	3686	my ($self, @args) = @_;
742
743	2					9	my ($seq, $seek, $revcom)
744							= $self->_rearrange([qw(
745							sequence
746							query
747							reverse_complement)], @args);
748
749	2	50				48	$self->throw("no sequence provided for the positions function")
750							unless ($seq);
751
752	2	50				5	$self->throw("no query provided for the positions function")
753							unless ($seek);
754
755
756	2					4	my $base = $self->_stripdown($seq, q{}, 1);
757
758	2					6	my $regarr = [];
759	2					3	my $query = $seek;
760	2					3	my $qref = ref($seek);
761	2	50				4	if ($qref)
762							{
763	0	0	0			0	$self->throw("object $qref is not a Bio::Seq or Bio::SeqFeature, or " .
			0
764							"Bio::GeneDesign::RestrictionEnzyme object")
765							unless ($seek->isa("Bio::Seq")
766							\|\| $seek->isa("Bio::SeqFeatureI")
767							\|\| $seek->isa("Bio::GeneDesign::RestrictionEnzyme"));
768	0	0				0	if ($seek->isa("Bio::GeneDesign::RestrictionEnzyme"))
769							{
770	0					0	$regarr = $seek->regex;
771							}
772							else
773							{
774	0	0				0	$query = ref($seek->seq) ? $seek->seq->seq : $seek->seq;
775	0	0				0	$regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)];
776							}
777							}
778							else
779							{
780	2	50				9	$regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)];
781							}
782
783	2					37	return _positions($base, $regarr);
784							}
785
786							=head2 parse_organisms
787
788							Returns two hash references. The first contains the names of all rscu tables.
789							The second contains the name of all codon tables.
790
791							=cut
792
793							sub parse_organisms
794							{
795	0			0	1	0	my ($self) = @_;
796	0					0	my ($rscu, $cods) = _parse_organisms($self->{codon_path});
797	0					0	return ($rscu, $cods);
798							}
799
800							=head2 set_codontable
801
802							# load a codon table from the GeneDesign configuration directory
803							$GD->set_codontable(-organism_name => "yeast");
804
805							# load a codon table from an arbitrary path and catch it in a variable
806							my $codon_t = $GD->set_codontable(-organism_name => "custom",
807							-table_path => "/path/to/table.ct");
808
809							The -organism_name argument is required.
810
811							This function loads, sets, and returns a codon definition table. After it is run
812							the accessor L will return the hash reference that
813							represents the codon table.
814
815							If no path is provided, the configuration directory /codon_tables is checked for
816							tables that match the provided organism name. Any codon table that is using a
817							non standard definition for a codon will cause a warning to be issued.
818
819							The table format for codon tables is
820
821							# Standard genetic code
822							{TTT} = F
823							{TTC} = F
824							{TTA} = L
825							...
826
827							See L
828
829							=cut
830
831							sub set_codontable
832							{
833	7			7	1	25	my ($self, @args) = @_;
834	7					27	my ($orgname, $table_path)
835							= $self->_rearrange([qw(
836							organism_name
837							table_path)], @args);
838
839	7	50				189	$self->throw("No organsim name provided") unless ($orgname);
840	7					17	$self->{organism} = $orgname;
841
842	7	50	33			156	$self->throw("$table_path does not exist")
843							if ($table_path && ! -e $table_path);
844
845	7	50				33	if (! $table_path )
846							{
847	0					0	$table_path = $self->{codon_path} . $orgname . ".ct";
848	0	0	0			0	if (-e $table_path && $orgname ne 'Standard')
849							{
850	0					0	warn "Using nonstandard codon definitions for $orgname\n";
851							}
852							else
853							{
854	0					0	$table_path = $self->{codon_path} . "Standard.ct";
855							}
856							}
857
858	7					40	$self->{codontable} = _parse_codon_file($table_path);
859	7					35	$self->{reversecodontable} = _reverse_codon_table($self->{codontable});
860	7					19	return $self->{codontable};
861							}
862
863							=head2 set_rscutable
864
865							# load a RSCU table from the GeneDesign configuration directory
866							$GD->set_rscutable(-organism_name => "yeast");
867
868							# load an RSCU table from an arbitrary path and catch it in a variable
869							my $rscu_t = $GD->set_rscutable(-organism_name => "custom",
870							-table_path => "/path/to/table.rscu");
871
872							The -organism_name argument is required.
873
874							This function loads, sets, and returns an RSCU table. After it is run
875							the accessor L will return the hash reference that
876							represents the RSCU table.
877
878							If no path is provided, the configuration directory /codon_tables is checked for
879							tables that match the provided organism name. If there is no table in that
880							directory, a warning will appear and the flat RSCU table will be used.
881
882							Any RSCU table that is missing a definition for a codon will cause a warning to
883							be issued. The table format for RSCU tables is
884
885							# Saccharomyces cerevisiae (Highly expressed genes)
886							# Nucleic Acids Res 16, 8207-8211 (1988)
887							{TTT} = 0.19
888							{TTC} = 1.81
889							{TTA} = 0.49
890							...
891
892							See L.
893
894							=cut
895
896							sub set_rscutable
897							{
898	7			7	1	24	my ($self, @args) = @_;
899	7					44	my ($orgname, $rscu_path)
900							= $self->_rearrange([qw(
901							organism_name
902							rscu_path)], @args);
903
904	7	50				216	$self->throw("No organsim name provided") unless ($orgname);
905	7					20	$self->{organism} = $orgname;
906
907	7	50	33			154	$self->throw("$rscu_path does not exist")
908							if ($rscu_path && ! -e $rscu_path);
909
910	7	50				33	if (! $rscu_path)
911							{
912	0					0	$rscu_path = $self->{codon_path} . $orgname . ".rscu";
913	0	0				0	if (! -e $rscu_path)
914							{
915	0					0	warn "No RSCU table for $orgname found. Using Unbiased values\n";
916	0					0	$rscu_path = $self->{codon_path} . "Unbiased.rscu";
917							}
918							}
919
920	7					30	$self->{rscutable} = _parse_codon_file($rscu_path);
921	7					22	return $self->{rscutable};
922							}
923
924							=head2 set_organism
925
926							# load both codon tables and RSCU tables simultaneously
927							$GD->set_organism(-organism_name => "yeast");
928
929							# with arguments
930							$GD->set_organism(-organism_name => "custom",
931							-table_path => "/path/to/table.ct",
932							-rscu_path => "/path/to/table.rscu");
933
934
935							The -organism_name argument is required.
936
937							This function is just a shortcut; it runs L and
938							L. See those functions for details.
939
940							=cut
941
942							sub set_organism
943							{
944	7			7	1	60	my ($self, @args) = @_;
945
946	7					67	my ($orgname, $table_path, $rscu_path)
947							= $self->_rearrange([qw(
948							organism_name
949							table_path
950							rscu_path)], @args);
951
952	7	50				275	$self->throw("No organsim name provided") unless ($orgname);
953	7					17	$self->{organism} = $orgname;
954
955	7					36	$self->set_codontable(-organism_name => $orgname, -table_path => $table_path);
956	7					43	$self->set_rscutable(-organism_name => $orgname, -rscu_path=> $rscu_path);
957
958	7					26	return;
959							}
960
961							=head2 codon_count
962
963							# count the codons in a list of sequences
964							my $tally = $GD->codon_count(-input => \@sequences);
965
966							# add a gene to an existing codon count
967							$tally = $GD->codon_count(-input => $sequence,
968							-count => $tally);
969
970							# add a list of Bio::Seq objects to an existing codon count
971							$tally = $GD->codon_count(-input => \@seqobjects,
972							-count => $tally);
973
974							The -input argument is required and will take a string variable, a Bio::Seq
975							object, a Bio::SeqFeatureI object, or a reference to an array full of any
976							combination of those things.
977
978							The codon_count function takes a set of sequences and counts how often each
979							codon appears in them. It returns a hash reference where the keys are upper case
980							nucleotide codons and the values are integers. If you pass a hash reference
981							containing codon counts with the -count argument, new counts will be added to
982							the old values.
983
984							This function will warn you if non nucleotide codons are found.
985
986							TODO: what about ambiguous codons?
987
988							=cut
989
990							sub codon_count
991							{
992	3			3	1	6186	my ($self, @args) = @_;
993
994	3					17	my ($input, $count) = $self->_rearrange([qw(input count)], @args);
995
996							$self->throw("no codon table has been defined")
997	3	50				55	unless $self->{codontable};
998
999	3	50				9	$self->throw("no sequences were provided to count")
1000							unless $input;
1001
1002	3					8	my $list = $self->_stripdown($input, 'ARRAY', 0);
1003
1004							my $cod_count = _codon_count($list,
1005							$self->{codontable},
1006	3					13	$count);
1007
1008	3	50				9	warn("There are bad codons in codon count\n") if (exists $cod_count->{XXX});
1009
1010	3					12	return $cod_count;
1011							}
1012
1013							=head2 generate_RSCU_table
1014
1015							my $rscu_t = $GD->generate_RSCU_table(-sequences => \@list_of_sequences);
1016
1017							The -sequences argument is required and will take a string variable, a Bio::Seq
1018							object, a Bio::SeqFeatureI object, or a reference to an array full of any
1019							combination of those things.
1020
1021							The generate_RSCU_table function takes a set of sequences, counts how often each
1022							codon appears, and returns an RSCU table as a hash reference where the keys are
1023							upper case nucleotide codons and the values are floats.
1024
1025							See L.
1026
1027							=cut
1028
1029							sub generate_RSCU_table
1030							{
1031	1			1	1	2683	my ($self, @args) = @_;
1032
1033	1					8	my ($seqobjs) = $self->_rearrange([qw(sequences)], @args);
1034
1035							$self->throw("no codon table has been defined")
1036	1	50				13	unless $self->{codontable};
1037
1038	1	50				4	$self->throw("Sequence set must be provided")
1039							unless ($seqobjs);
1040
1041							return _generate_RSCU_table(
1042							$self->codon_count(-input => $seqobjs),
1043							$self->{codontable},
1044							$self->{reversecodontable}
1045	1					5	);
1046							}
1047
1048							=head2 generate_codon_report
1049
1050							my $report = $GD->generate_codon_report(-sequences => \@list_of_sequences);
1051
1052							The report will have the format
1053
1054							TTT (F) 12800 0.74
1055							TTC (F) 21837 1.26
1056							TTA (L) 4859 0.31
1057							TTG (L) 18806 1.22
1058
1059							where the first column in each group is the codon, the second column is the one
1060							letter amino acid abbreviation in parentheses, the third column is the number of
1061							times that codon has been seen, and the fourth column is the RSCU value for that
1062							codon.
1063
1064							This report comes in a 4x4 layout, as would a standard genetic code table in a
1065							textbook.
1066
1067							NO TEST
1068
1069							=cut
1070
1071							sub generate_codon_report
1072							{
1073	0			0	1	0	my ($self, @args) = @_;
1074
1075	0					0	my ($seqobjs) = $self->_rearrange([qw(sequences)], @args);
1076
1077							$self->throw("no codon table has been defined")
1078	0	0				0	unless $self->{codontable};
1079
1080	0	0				0	$self->throw("Sequence set must be provided")
1081							unless ($seqobjs);
1082
1083	0					0	my $count_t = $self->codon_count(-input => $seqobjs);
1084
1085							my $rscu_t = _generate_RSCU_table(
1086							$count_t,
1087							$self->{codontable},
1088							$self->{reversecodontable}
1089	0					0	);
1090
1091							my $report = _generate_codon_report(
1092							$count_t,
1093							$self->{codontable},
1094	0					0	$rscu_t
1095							);
1096
1097	0					0	return $report;
1098							}
1099
1100							=head2 generate_RSCU_file
1101
1102							my $contents = $GD->generate_RSCU_file(
1103							-sequences => \@seqs,
1104							-comments => ["Got these codons from mice"]
1105							);
1106							open (my $OUT, '>', '/path/to/cods') \|\| die "can't write to /path/to/cods";
1107							print $OUT $contents;
1108							close $OUT;
1109
1110							This function generates a string that can be written to file to serve as a
1111							GeneDesign RSCU table. Provide a set of sequences and an optional array
1112							reference of comments to prepend to the file.
1113
1114							The file will have the format
1115							# Comment 1
1116							# ...
1117							# Comment n
1118							{TTT} = 0.19
1119							{TTC} = 1.81
1120							...
1121
1122							NO TEST
1123
1124							=cut
1125
1126							sub generate_RSCU_file
1127							{
1128	0			0	1	0	my ($self, @args) = @_;
1129
1130	0					0	my ($seqobjs, $comments) = $self->_rearrange([qw(sequences comments)], @args);
1131
1132							$self->throw('No codon table has been defined')
1133	0	0				0	unless $self->{codontable};
1134
1135	0	0				0	$self->throw('Sequence set must be provided')
1136							unless ($seqobjs);
1137
1138	0	0				0	$self->throw('Comment argument must be array reference')
1139							unless ref($comments) eq 'ARRAY';
1140
1141	0					0	my $count_t = $self->codon_count(-input => $seqobjs);
1142
1143							my $rscu_t = _generate_RSCU_table(
1144							$count_t,
1145							$self->{codontable},
1146							$self->{reversecodontable}
1147	0					0	);
1148
1149							my $report = _generate_codon_file(
1150							$rscu_t,
1151							$self->{reversecodontable},
1152	0					0	$comments
1153							);
1154
1155	0					0	return $report;
1156							}
1157
1158							=head2 list_enzyme_sets
1159
1160							my @available_enzlists = $GD->list_enzyme_sets();
1161							# @available_enzlists == ('standard_and_IIB', 'blunts', 'IIB', 'nonpal', ...)
1162
1163							Returns an array containing the names of every restriction enzyme recognition
1164							list GeneDesign knows about.
1165
1166							=cut
1167
1168							sub list_enzyme_sets
1169							{
1170	0			0	1	0	my ($self) = @_;
1171	0					0	my $epath = $self->{conf} . 'enzymes/';
1172	0					0	my @sets = ();
1173	0	0				0	opendir (my $ENZDIR, $epath) \|\| $self->throw("can't opendir $epath");
1174	0					0	foreach my $list (readdir($ENZDIR))
1175							{
1176	0					0	my $name = basename($list);
1177	0					0	$name =~ s{\.[^.]+$}{}x;
1178	0	0	0			0	next if ($name eq q{.} \|\| $name eq q{..});
1179	0					0	push @sets, $name;
1180							}
1181	0					0	closedir($ENZDIR);
1182	0					0	return @sets;
1183							}
1184
1185							=head2 set_restriction_enzymes
1186
1187							$GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1188
1189							or
1190
1191							$GD->set_restriction_enzymes(-list_path => '/path/to/enzyme_file');
1192
1193							or even
1194
1195							$GD->set_restriction_enzymes(
1196							-list_path => '/path/to/enzyme_file',
1197							-enzyme_set => 'custom_enzymes'
1198							);
1199
1200							All will return a hash structure full of restriction enzymes.
1201
1202							Tell GeneDesign which set of restriction enzymes to use. If you provide only a
1203							set name with the -enzyme_set flag, GeneDesign will check its config path for a
1204							matching file. Otherwise you must provide a path to a file (and optionally a
1205							name for the set).
1206
1207							=cut
1208
1209							sub set_restriction_enzymes
1210							{
1211	3			3	1	530	my ($self, @args) = @_;
1212
1213	3					14	my ($set_name, $set_path)
1214							= $self->_rearrange([qw(enzyme_set list_path)], @args);
1215
1216	3					66	my $def = 'all_enzymes';
1217	3					11	my $defpath = $self->{conf} . 'enzymes/' . $def;
1218	3					16	$self->{all_enzymes} = _define_sites($defpath);
1219
1220
1221	3	50	33			48	if (! $set_name && ! $set_path)
		50	33
		50
1222							{
1223	0					0	$self->{enzyme_set} = $self->{all_enzymes};
1224	0					0	$self->{enzyme_set_name} = $def;
1225							}
1226							elsif ($set_name && ! $set_path)
1227							{
1228	0					0	$set_path = $self->{conf} . "enzymes/$set_name";
1229	0	0				0	if ( ! -e $set_path)
1230							{
1231	0					0	my $list = join q{, }, $self->list_enzyme_sets();
1232	0					0	my $message = "No enzyme set found that matches $set_name; ";
1233	0					0	$message .= "Options are ($list)\n";
1234	0					0	$self->throw($message);
1235							}
1236	0					0	my $list = _parse_enzyme_list($set_path);
1237	0					0	my $set = {};
1238	0					0	foreach my $id (@$list)
1239							{
1240	0	0				0	if (! exists $self->{all_enzymes}->{$id})
1241							{
1242	0					0	warn "$id was not recognized as an enzyme... skipping\n";
1243	0					0	next;
1244							}
1245	0					0	$set->{$id} = $self->{all_enzymes}->{$id};
1246							}
1247	0					0	$self->{enzyme_set} = $set;
1248	0					0	$self->{enzyme_set_name} = $set_name;
1249							}
1250							elsif ($set_path)
1251							{
1252	3	50				173	$self->throw("No enzyme list found at $set_path\n")
1253							unless (-e $set_path);
1254	3					72	my $list = _parse_enzyme_list($set_path);
1255	3					9	my $set = {};
1256	3					11	foreach my $id (@$list)
1257							{
1258	87	50				166	if (! exists $self->{all_enzymes}->{$id})
1259							{
1260	0					0	warn "$id was not recognized as an enzyme... skipping\n";
1261	0					0	next;
1262							}
1263	87					158	$set->{$id} = $self->{all_enzymes}->{$id};
1264							}
1265	3					8	$self->{enzyme_set} = $set;
1266	3		33			194	$self->{enzyme_set_name} = $set_name \|\| basename($set_path);
1267							}
1268	3					20	return $self->{enzyme_set};
1269							}
1270
1271							=head2 remove_from_enzyme_set
1272
1273							Removes a subset of enzymes from an enzyme list. This only happens in memory, no
1274							files will be altered. The argument is an array reference of enzyme names.
1275
1276							$GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1277							$GD->remove_from_enzyme_set(-enzymes => ['SmaI', 'MlyI']);
1278
1279							NO TEST
1280
1281							=cut
1282
1283							sub remove_from_enzyme_set
1284							{
1285	0			0	1	0	my ($self, @args) = @_;
1286
1287	0					0	my ($enzes) = $self->_rearrange([qw(enzymes)], @args);
1288
1289	0	0				0	return unless ($enzes);
1290
1291							$self->throw("no enzyme set has been defined")
1292	0	0				0	unless $self->{enzyme_set};
1293
1294	0					0	my @toremove = ();
1295	0					0	my $ref = ref ($enzes);
1296	0	0				0	if ($ref eq "ARRAY")
1297							{
1298	0					0	push @toremove, @$enzes;
1299							}
1300							else
1301							{
1302	0					0	push @toremove, $enzes;
1303							}
1304	0					0	foreach my $enz (@toremove)
1305							{
1306	0					0	my $eid = $enz;
1307	0					0	my $eref = ref $enz;
1308	0	0				0	if ($eref)
1309							{
1310	0	0				0	$self->throw("object in enzymes is not a " .
1311							"Bio::GeneDesign::RestrictionEnzyme object")
1312							unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme"));
1313	0					0	$eid = $enz->id;
1314							}
1315
1316	0	0				0	if (exists $self->{enzyme_set}->{$eid})
1317							{
1318	0					0	delete $self->{enzyme_set}->{$eid};
1319							}
1320							}
1321	0					0	return;
1322							}
1323
1324							=head2 add_to_enzyme_set
1325
1326							Adds a subset of enzymes to an enzyme list. This only happens in memory, no
1327							files will be altered. The argument is an array reference of RestrictionEnzyme
1328							objects.
1329
1330							#Grab all known enzymes
1331							my $allenz = $GD->set_restriction_enzymes(-enzyme_set => 'standard_and_IIB');
1332
1333							#Pull out a few
1334							my @keepers = ($allenz->{'BmrI'}, $allenz->{'HphI'});
1335
1336							#Give GeneDesign the enzyme set you want
1337							$GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1338
1339							#Add the few enzymes it didn't have before
1340							$GD->add_to_enzyme_set(-enzymes => \@keepers);
1341
1342							NO TEST
1343
1344							=cut
1345
1346							sub add_to_enzyme_set
1347							{
1348	0			0	1	0	my ($self, @args) = @_;
1349
1350	0					0	my ($enzes) = $self->_rearrange([qw(enzymes)], @args);
1351
1352	0	0				0	return unless ($enzes);
1353
1354							$self->throw("no enzyme set has been defined")
1355	0	0				0	unless $self->{enzyme_set};
1356
1357	0					0	my @toadds = ();
1358	0					0	my $ref = ref ($enzes);
1359	0	0				0	if ($ref eq "ARRAY")
1360							{
1361	0					0	push @toadds, @$enzes;
1362							}
1363							else
1364							{
1365	0					0	push @toadds, $enzes;
1366							}
1367	0					0	foreach my $enz (@toadds)
1368							{
1369	0	0				0	$self->throw("object in enzymes is not a " .
1370							"Bio::GeneDesign::RestrictionEnzyme object")
1371							unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme"));
1372
1373	0	0				0	next if (exists $self->{enzyme_set}->{$enz->id});
1374	0					0	$self->{enzyme_set}->{$enz->id} = $enz;
1375							}
1376	0					0	return;
1377							}
1378
1379							=head2 restriction_status
1380
1381							=cut
1382
1383							sub restriction_status
1384							{
1385	1			1	1	39172	my ($self, @args) = @_;
1386
1387	1					8	my ($seq) = $self->_rearrange([qw(sequence)], @args);
1388
1389							$self->throw("no enzyme set has been defined")
1390	1	50				33	unless $self->{enzyme_set};
1391
1392	1	50				3	$self->throw("No arguments provided for set_restriction_enzymes")
1393							unless ($seq);
1394
1395	1					6	my $str = $self->_stripdown($seq, q{}, 0);
1396	1					3	my @reslist = values %{$self->{enzyme_set}};
	1					5
1397	1					7	return _define_site_status($str, \@reslist);
1398							}
1399
1400							=head2 build_prefix_tree
1401
1402							Take an array reference of nucleotide sequences (they can be strings, Bio::Seq
1403							objects, or Bio::GeneDesign::RestrictionEnzyme objects) and create a suffix
1404							tree. If you add the peptide flag, the sequences will be ambiguously translated
1405							before they are added to the suffix tree. Otherwise they will be ambiguously
1406							transcribed. It will add the reverse complement of any non peptide sequence as
1407							long as the reverse complement is different.
1408
1409							my $tree = $GD->build_prefix_tree(-input => ['GGATCC']);
1410
1411							my $ptree = $GD->build_prefix_tree(
1412							-input => ['GGCCNNNNNGGCC'],
1413							-peptide => 1
1414							);
1415
1416							=cut
1417
1418							sub build_prefix_tree
1419							{
1420	2			2	1	4953	my ($self, @args) = @_;
1421
1422	2					15	my ($list, $pep) = $self->_rearrange([qw(input peptide)], @args);
1423
1424	2	50				68	$self->throw("no input provided")
1425							unless ($list);
1426
1427	2					9	my $tree = Bio::GeneDesign::PrefixTree->new();
1428
1429	2					6	foreach my $seq (@$list)
1430							{
1431	10					15	my $base = $seq;
1432	10					13	my $id = $seq;
1433	10					30	my $ref = ref($seq);
1434	10	50				17	if ($ref)
1435							{
1436	10	50	33			97	$self->throw("object in input is not a Bio::Seq, Bio::SeqFeature, or " .
			33
1437							"Bio::GeneDesign::RestrictionEnzyme object")
1438							unless ($seq->isa("Bio::Seq")
1439							\|\| $seq->isa("Bio::SeqFeatureI")
1440							\|\| $seq->isa("Bio::GeneDesign::RestrictionEnzyme")
1441							);
1442	10	50				27	$base = ref($seq->seq) ? $seq->seq->seq : $seq->seq;
1443	10					28	$id = $seq->id;
1444							}
1445
1446	10	100				23	if ($pep)
1447							{
1448							$self->throw('No codon table has been defined')
1449	4	50				10	unless $self->{codontable};
1450
1451							my @fpeptides = _amb_translation($base, $self->{codontable},
1452	4					16	't', $self->{amb_trans_memo});
1453	4					19	$tree->add_prefix($_, $id, $base) foreach (@fpeptides);
1454
1455	4					12	my $esab = _complement($base, 1);
1456	4					7	my $lagcheck = $esab;
1457	4					14	while (substr($lagcheck, -1) eq "N")
1458							{
1459	0					0	$lagcheck = substr($lagcheck, 0, length($lagcheck) - 1);
1460							}
1461	4	100	66			23	if ($esab ne $base && $lagcheck eq $esab)
1462							{
1463							my @rpeptides = _amb_translation($esab, $self->{codontable},
1464	2					9	't', $self->{amb_trans_memo});
1465	2					10	$tree->add_prefix($_, $id, $esab) foreach (@rpeptides);
1466							}
1467							}
1468							else
1469							{
1470	6					18	my $fnucs = _amb_transcription($base);
1471	6					20	$tree->add_prefix($_, $id, undef) foreach (@$fnucs);
1472
1473	6					15	my $esab = _complement($base, 1);
1474	6	100				21	if ($esab ne $base)
1475							{
1476	3					8	my $rnucs = _amb_transcription($esab);
1477	3					8	$tree->add_prefix($_, $id, undef) foreach (@$rnucs);
1478							}
1479							}
1480							}
1481	2					316	return $tree;
1482							}
1483
1484							=head2 search_prefix_tree
1485
1486							Takes a suffix tree and a sequence and searches for results, which are returned
1487							as in the Bio::GeneDesign::PrefixTree documentation.
1488
1489							my $hits = $GD->search_prefix_tree(-tree => $ptree, -sequence => $mygeneseq);
1490
1491							# @$hits = (['BamHI', 4, 'GGATCC', 'i hope this didn't pop up'],
1492							# ['OhnoI', 21, 'GGCCC', 'I hope these pop up'],
1493							# ['WoopsII', 21, 'GGCCC', 'I hope these pop up']
1494							#);
1495
1496							=cut
1497
1498							sub search_prefix_tree
1499							{
1500	2			2	1	125115	my ($self, @args) = @_;
1501
1502	2					17	my ($tree, $seq) = $self->_rearrange([qw(tree sequence)], @args);
1503
1504	2	50				76	$self->throw("no query sequence provided")
1505							unless ($seq);
1506
1507	2	50				7	$self->throw("no suffix tree provided")
1508							unless ($tree);
1509
1510	2	50				15	$self->throw("tree is not a Bio::GeneDesign::PrefixTree")
1511							unless ($tree->isa("Bio::GeneDesign::PrefixTree"));
1512
1513	2					11	my $str = $self->_stripdown($seq, q{}, 0);
1514
1515	2					11	my @hits = $tree->find_prefixes($str);
1516
1517	2					36	return \@hits;
1518							}
1519
1520							=head2 pattern_aligner
1521
1522							=cut
1523
1524							sub pattern_aligner
1525							{
1526	3			3	1	7640	my ($self, @args) = @_;
1527
1528	3					17	my ($seq, $pattern, $peptide, $re)
1529							= $self->_rearrange([qw(sequence pattern peptide offset)], @args);
1530
1531							$self->throw("no codon table has been defined")
1532	3	50				116	unless $self->{codontable};
1533
1534	3	50				8	$self->throw("no nucleotide sequence provided")
1535							unless $seq;
1536
1537	3		100			10	$re = $re \|\| 0;
1538
1539	3					10	my $str = $self->_stripdown($seq, q{}, 1);
1540
1541	3		33			16	$peptide = $peptide \|\| $self->translate(-sequence => $str);
1542
1543							my ($newpattern, $offset) = _pattern_aligner($str,
1544							$pattern,
1545							$peptide,
1546							$self->{codontable},
1547							$self->{amb_trans_memo}
1548	3					12	);
1549	3	100				17	return $re ? ($newpattern, $offset) : $newpattern;
1550							}
1551
1552							=head2 pattern_adder
1553
1554							=cut
1555
1556							sub pattern_adder
1557							{
1558	1			1	1	3016	my ($self, @args) = @_;
1559
1560	1					8	my ($seq, $pattern) = $self->_rearrange([qw(sequence pattern)], @args);
1561
1562							$self->throw("no codon table has been defined")
1563	1	50				39	unless $self->{codontable};
1564
1565	1	50				4	$self->throw("no nucleotide sequence provided")
1566							unless $seq;
1567
1568	1					5	my $str = $self->_stripdown($seq, q{}, 1);
1569	1					5	my $pat = $self->_stripdown($pattern, q{}, 1);
1570
1571							my $newsequence = _pattern_adder($str,
1572							$pat,
1573							$self->{codontable},
1574							$self->{reversecodontable},
1575							$self->{amb_trans_memo}
1576	1					7	);
1577	1					5	return $newsequence;
1578							}
1579
1580							=head2 codon_change_type
1581
1582							=cut
1583
1584							sub codon_change_type
1585							{
1586	0			0	1	0	my ($self, @args) = @_;
1587
1588	0					0	my ($codold, $codnew) = $self->_rearrange([qw(from to)], @args);
1589
1590							$self->throw("no codon table has been defined")
1591	0	0				0	unless $self->{codontable};
1592
1593	0	0				0	$self->throw("no from sequence provided")
1594							unless $codold;
1595
1596	0	0				0	$self->throw("no to sequence provided")
1597							unless $codnew;
1598
1599	0					0	my $changetype = _codon_change_type($codold, $codnew, $self->{codontable});
1600	0					0	return $changetype;
1601							}
1602
1603							=head2 translate
1604
1605							=cut
1606
1607							sub translate
1608							{
1609	9			9	1	6046	my ($self, @args) = @_;
1610
1611	9					32	my ($seq, $frame) = $self->_rearrange([qw(sequence frame)], @args);
1612
1613							$self->throw("no codon table has been defined")
1614	9	50				209	unless $self->{codontable};
1615
1616	9	50				16	$self->throw("no nucleotide sequence provided")
1617							unless $seq;
1618
1619	9					30	my $str = $self->_stripdown($seq, q{}, 1);
1620
1621	9	100				16	$frame = $frame ? $frame : 1;
1622
1623	9	50	33			40	$self->throw("frame must be -3, -2, -1, 1, 2, or 3")
1624							if (abs($frame) > 4 \|\| abs($frame) < 0);
1625
1626	9					30	my $peptide = _translate($str, $frame, $self->{codontable});
1627	9	100				19	if (ref $seq)
1628							{
1629	6					22	my $newobj = $seq->clone();
1630	6	50				183	my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1631	6					74	$desc = "translated with " . $self->{organism} . " codon values";
1632	6					17	$newobj->seq($peptide);
1633	6					384	$newobj->alphabet("protein");
1634	6					87	$newobj->desc($desc);
1635	6					55	return $newobj;
1636							}
1637							else
1638							{
1639	3					12	return $peptide;
1640							}
1641							}
1642
1643							=head2 reverse_translate_algorithms
1644
1645							=cut
1646
1647							sub reverse_translate_algorithms
1648							{
1649	0			0	1	0	return Bio::GeneDesign::ReverseTranslate::_list_algorithms();
1650							}
1651
1652							=head2 reverse_translate
1653
1654							=cut
1655
1656							sub reverse_translate
1657							{
1658	20001			20001	1	2393998	my ($self, @args) = @_;
1659
1660	20001					63003	my ($pep, $algorithm)
1661							= $self->_rearrange([qw(
1662							peptide
1663							algorithm)], @args);
1664
1665							$self->throw("no codon table has been defined")
1666	20001	50				502277	unless $self->{codontable};
1667
1668							$self->throw("no RSCU table has been defined")
1669	20001	50				41264	unless $self->{rscutable};
1670
1671	20001	50				38870	$self->throw("no peptide sequence provided")
1672							unless $pep;
1673
1674	20001					44435	my $str = $self->_stripdown($pep, q{}, 1);
1675
1676	20001					59401	my ($newstr, @baddies) = _sanitize($str, 'pep');
1677	20001					34973	$str = $newstr;
1678	20001	50				38376	if (scalar @baddies)
1679							{
1680	0					0	print "\nGD_WARNING: removed bad characters (", join q{, }, @baddies;
1681	0					0	print ") from input sequence\n";
1682							}
1683	20001		50			36889	$algorithm = $algorithm \|\| "balanced";
1684	20001					32326	$algorithm = lc $algorithm;
1685	20001					40350	$algorithm =~ s{\;}{}xg;
1686	20001					34534	my $name = "_reversetranslate" . "_" . $algorithm;
1687	20001					53835	my $subref = \&$name;
1688	20001		33			59609	my $seq = &$subref($self->{reversecodontable}, $self->{rscutable}, $str)
1689							\|\| $self->throw("Can't reverse translate with $algorithm? $!");
1690
1691	20001	50				50931	if (ref $pep)
1692							{
1693	20001					54489	my $newobj = $pep->clone();
1694	20001	50				598532	my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1695	20001					269345	$desc .= "$algorithm reverse translated with " . $self->{organism};
1696	20001					32014	$desc .= " RSCU values";
1697	20001					46199	$newobj->seq($seq);
1698	20001					1556640	$newobj->desc($desc);
1699	20001					216864	my $CDS = Bio::SeqFeature::Generic->new
1700							(
1701							-primary => 'CDS',
1702							-start => 1,
1703							-end => length $seq,
1704							-tag => {
1705							label => $newobj->id . '_CDS'
1706							},
1707							);
1708	20001	50				4756265	$newobj->add_SeqFeature($CDS) \|\| $self->throw("Cannot add CDS");
1709	20001					796497	return $newobj;
1710							}
1711							else
1712							{
1713	0					0	return $seq;
1714							}
1715							}
1716
1717							=head2 codon_juggle_algorithms
1718
1719							=cut
1720
1721							sub codon_juggle_algorithms
1722							{
1723	0			0	1	0	return Bio::GeneDesign::CodonJuggle::_list_algorithms();
1724							}
1725
1726							=head2 codon_juggle
1727
1728							=cut
1729
1730							sub codon_juggle
1731							{
1732	20003			20003	1	687692	my ($self, @args) = @_;
1733
1734	20003					56434	my ($seq, $algorithm)
1735							= $self->_rearrange([qw(
1736							sequence
1737							algorithm)], @args);
1738
1739							$self->throw("no codon table has been defined")
1740	20003	50				396189	unless $self->{codontable};
1741
1742							$self->throw("no RSCU table has been defined")
1743	20003	50				32389	unless $self->{rscutable};
1744
1745	20003	50				30907	$self->throw("no nucleotide sequence provided")
1746							unless $seq;
1747
1748	20003	50				27626	$self->throw("no algorithm provided")
1749							unless $algorithm;
1750
1751	20003					36930	my $str = $self->_stripdown($seq, q{}, 0);
1752
1753							##REPLACE THIS WITH CODE THAT GRACEFULLY JUGGLES JUST CDSES OR GENES
1754	20003	50				36644	$self->throw("sequence does not appear to be the right length to be a gene")
1755							unless length($str) % 3 == 0;
1756
1757	20003					26899	$algorithm = lc $algorithm;
1758	20003					35934	$algorithm =~ s/\W//xg;
1759	20003					29904	my $name = "_codonJuggle_" . $algorithm;
1760	20003					38991	my $subref = \&$name;
1761							my $newseq = &$subref($self->{codontable},
1762							$self->{reversecodontable},
1763							$self->{rscutable},
1764	20003		33			48912	$str
1765							) \|\| $self->throw("Can't run $algorithm? $!");
1766	20003	50				39948	if (ref $seq)
1767							{
1768	20003					42686	my $newobj = $seq->clone();
1769	20003	50				468520	my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1770	20003					221761	$desc .= "$algorithm codon juggled with " . $self->{organism};
1771	20003					26560	$desc .= " RSCU values";
1772	20003					41684	$newobj->seq($newseq);
1773	20003					1244626	$newobj->desc($desc);
1774	20003					184508	return $newobj;
1775							}
1776							else
1777							{
1778	0					0	return $newseq;
1779							}
1780							}
1781
1782							=head2 subtract_sequence
1783
1784							=cut
1785
1786							sub subtract_sequence
1787							{
1788	4			4	1	3305	my ($self, @args) = @_;
1789
1790	4					17	my ($seq, $rem) = $self->_rearrange([qw(sequence remove)], @args);
1791
1792							$self->throw("no codon table has been defined")
1793	4	50				81	unless $self->{codontable};
1794
1795							$self->throw("no RSCU table has been defined")
1796	4	50				11	unless $self->{rscutable};
1797
1798	4	50				10	$self->throw("no nucleotide sequence provided")
1799							unless $seq;
1800
1801	4	50				19	$self->throw("no sequence to be removed was defined")
1802							unless ($rem);
1803
1804	4					15	my $str = $self->_stripdown($seq, q{}, 0);
1805
1806	4					7	my $regarr;
1807	4					8	my $less = $rem;
1808	4	50				10	if (ref($rem))
1809							{
1810	4	100	66			28	if ($rem->isa("Bio::Seq") \|\| $rem->isa("Bio::SeqFeatureI"))
		50
1811							{
1812	3	50				16	$less = ref($rem->seq) ? $rem->seq->seq : $rem->seq;
1813	3					74	$regarr = _regarr($less, 1);
1814							}
1815							elsif ($rem->isa("Bio::GeneDesign::RestrictionEnzyme"))
1816							{
1817	1					5	$less = $rem->seq;
1818	1					5	$regarr = $rem->regex;
1819							}
1820							else
1821							{
1822	0					0	$self->throw("removal argument is not a Bio::Seq, Bio::SeqFeature, or "
1823							. "Bio::GeneDesign::RestrictionEnzyme");
1824							}
1825							}
1826							else
1827							{
1828	0					0	$regarr = _regarr($less, 1);
1829							}
1830
1831							my $newseq = _subtract( uc $str,
1832							uc $less,
1833							$regarr,
1834							$self->{codontable},
1835							$self->{rscutable},
1836							$self->{reversecodontable}
1837	4					26	);
1838
1839	4	50				14	if (ref $seq)
1840							{
1841	4					20	my $newobj = $seq->clone();
1842	4	50				161	my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1843	4					64	$desc .= $rem->id . " subtracted with " . $self->{organism};
1844	4					50	$desc .= " RSCU values";
1845	4					12	$newobj->seq($newseq);
1846	4					311	$newobj->desc($desc);
1847	4					51	return $newobj;
1848							}
1849							else
1850							{
1851	0					0	return $newseq;
1852							}
1853							}
1854
1855							=head2 repeat_smash
1856
1857							=cut
1858
1859							sub repeat_smash
1860							{
1861	0			0	1	0	my ($self, @args) = @_;
1862
1863	0					0	my ($seq) = $self->_rearrange([qw(sequence)], @args);
1864
1865							$self->throw("no codon table has been defined")
1866	0	0				0	unless $self->{codontable};
1867
1868							$self->throw("no RSCU table has been defined")
1869	0	0				0	unless $self->{rscutable};
1870
1871	0	0				0	$self->throw("no nucleotide sequence provided")
1872							unless $seq;
1873
1874	0					0	my $str = $self->_stripdown($seq, q{}, 0);
1875
1876
1877							my $newseq = _minimize_local_alignment_dp
1878							(
1879							$str,
1880							$self->{codontable},
1881							$self->{reversecodontable},
1882							$self->{rscutable}
1883	0					0	);
1884	0	0				0	if (ref $seq)
1885							{
1886	0					0	my $newobj = $seq->clone();
1887	0	0				0	my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1888	0					0	$desc .= $seq->id . " repeat smashed with " . $self->{organism};
1889	0					0	$desc .= " RSCU values";
1890	0					0	$newobj->seq($newseq);
1891	0					0	$newobj->desc($desc);
1892	0					0	return $newobj;
1893							}
1894							else
1895							{
1896	0					0	return $newseq;
1897							}
1898							}
1899
1900							=head2 make_amplification_primers
1901
1902							NO TEST
1903
1904							=cut
1905
1906							sub make_amplification_primers
1907							{
1908	0			0	1	0	my ($self, @args) = @_;
1909
1910	0					0	my ($seq, $temp) = $self->_rearrange([qw(sequence temperature)], @args);
1911
1912	0	0				0	$self->throw("no sequence provided") unless ($seq);
1913	0		0			0	$temp = $temp \|\| 60;
1914
1915	0					0	my $str = $self->_stripdown($seq, q{}, 0);
1916
1917	0					0	return _make_amplification_primers($str, $temp);
1918							}
1919
1920							=head2 contains_homopolymer
1921
1922							Returns 1 if the sequence contains a homopolymer of the provided length (default
1923							is 5bp) and 0 else
1924
1925							=cut
1926
1927							sub contains_homopolymer
1928							{
1929	0			0	1	0	my ($self, @args) = @_;
1930
1931	0					0	my ($seq, $length) = $self->_rearrange([qw(sequence length)], @args);
1932
1933	0	0				0	$self->throw("no sequence provided") unless ($seq);
1934	0		0			0	$length = $length \|\| 5;
1935
1936	0					0	my $str = $self->_stripdown($seq, q{}, 0);
1937
1938	0					0	return _check_for_homopolymer($str, $length);
1939							}
1940
1941							=head2 filter_homopolymers
1942
1943							=cut
1944
1945							sub filter_homopolymers
1946							{
1947	0			0	1	0	my ($self, @args) = @_;
1948
1949	0					0	my ($seqs, $length) = $self->_rearrange([qw(sequences length)], @args);
1950
1951	0	0				0	$self->throw("no argument provided to filter_homopolymers")
1952							unless $seqs;
1953	0		0			0	$length = $length \|\| 5;
1954
1955	0					0	my $arrref = $self->_stripdown($seqs, 'ARRAY', 1);
1956
1957	0					0	my $seqarr = _filter_homopolymer( $arrref, $length);
1958	0					0	return $seqarr;
1959							}
1960
1961							=head2 find_runs
1962
1963							=cut
1964
1965							sub find_runs
1966							{
1967	0			0	1	0	my ($self, @args) = @_;
1968
1969	0					0	my ($seq, $pattern, $min) = $self->_rearrange([qw(
1970							sequence pattern minimum)], @args);
1971
1972	0	0				0	if (! $pattern)
1973							{
1974	0					0	$self->throw("no pattern argument provided to find_runs");
1975							}
1976	0		0			0	$min = $min \|\| 5;
1977
1978	0					0	return _find_runs($seq, $pattern, $min);
1979							}
1980
1981							=head2 make_graph
1982
1983							=cut
1984
1985							sub make_graph
1986							{
1987	0			0	1	0	my ($self, @args) = @_;
1988
1989							$self->throw("Graphing is not available")
1990	0	0				0	unless $self->{graph};
1991
1992	0					0	my ($seqobjs, $window)
1993							= $self->_rearrange([qw(sequences window)], @args);
1994
1995							$self->throw("no codon table has been defined")
1996	0	0				0	unless $self->{codontable};
1997
1998							$self->throw("no RSCU table has been defined")
1999	0	0				0	unless $self->{rscutable};
2000
2001	0	0				0	$self->throw("no nucleotide sequences provided")
2002							unless $seqobjs;
2003
2004	0	0				0	$self->throw("sequences argument is not an array reference")
2005							unless ref($seqobjs) eq "ARRAY";
2006
2007	0					0	foreach my $seqobj (@$seqobjs)
2008							{
2009	0	0				0	$self->throw(ref($seqobj) . " is not a Bio::Seq object")
2010							unless $seqobj->isa("Bio::Seq");
2011
2012	0	0				0	$self->throw("$seqobj is not a nucleotide sequence")
2013							unless $seqobj->alphabet eq "dna";
2014							}
2015
2016							my ($graph, $format) = _make_graph( $seqobjs,
2017							$window,
2018							$self->{organism},
2019							$self->{codontable},
2020							$self->{rscutable},
2021	0					0	$self->{reversecodontable});
2022	0					0	return ($graph, $format);
2023							}
2024
2025							=head2 make_dotplot
2026
2027							=cut
2028
2029							sub make_dotplot
2030							{
2031	0			0	1	0	my ($self, @args) = @_;
2032
2033							$self->throw("Graphing is not available")
2034	0	0				0	unless $self->{graph};
2035
2036	0					0	my ($seq1, $seq2, $window, $stringency)
2037							= $self->_rearrange([qw(first second window stringency)], @args);
2038
2039	0		0			0	$window = $window \|\| 10;
2040	0		0			0	$stringency = $stringency \|\| 10;
2041
2042	0	0	0			0	$self->throw("no nucleotide sequences provided")
2043							unless ($seq1 && $seq2);
2044
2045	0					0	foreach my $seqobj ($seq1, $seq2)
2046							{
2047	0	0				0	$self->throw(ref($seqobj) . " is not a Bio::Seq object")
2048							unless $seqobj->isa("Bio::Seq");
2049
2050	0	0				0	$self->throw("$seqobj is not a nucleotide sequence")
2051							unless $seqobj->alphabet eq "dna";
2052							}
2053
2054	0					0	my $graph = _dotplot(
2055							$seq1->seq,
2056							$seq2->seq,
2057							$window,
2058							$stringency
2059							);
2060	0					0	return $graph;
2061							}
2062
2063							=head2 import_seqs
2064
2065							NO TEST
2066
2067							=cut
2068
2069							sub import_seqs
2070							{
2071	1			1	1	376	my ($self, $path) = @_;
2072	1	50				21	$self->throw("$path does not exist.") if (! -e $path);
2073	1					7	my ($iterator, $filename, $suffix) = _import_sequences($path);
2074	1					4	return ($iterator, $filename, $suffix);
2075							}
2076							=head2 import_seq_from_string
2077
2078							NO TEST
2079
2080							=cut
2081
2082							sub import_seq_from_string
2083							{
2084	0			0	0	0	my ($self, $string) = @_;
2085	0					0	my ($iterator, $filename, $suffix) = _import_sequences_from_string($string);
2086	0					0	return ($iterator, $filename, $suffix);
2087							}
2088
2089							=head2 export_formats
2090
2091							Export formats that have been tried and tested to work well.
2092
2093							=cut
2094
2095							sub export_formats
2096							{
2097	0			0	1	0	return Bio::GeneDesign::IO::_export_formats();
2098							}
2099
2100							=head2 export_seqs
2101
2102							NO TEST
2103
2104							=cut
2105
2106							sub export_seqs
2107							{
2108	0			0	1	0	my ($self, @args) = @_;
2109
2110	0					0	my ($outpath, $outformat, $seqarr)
2111							= $self->_rearrange([qw(
2112							filepath
2113							format
2114							sequences)], @args);
2115
2116	0	0				0	$outformat = $outformat ? $outformat : 'genbank';
2117	0	0				0	$self->throw("$outformat is not a format recognized by BioPerl")
2118							if (! _isa_BP_format($outformat));
2119
2120							#Long attributes that come in from a genbank file will have corruption
2121							#remove spaces and reattribute to fix bbs in genbank file ):
2122	0	0				0	_long_att_fix($seqarr) if ($outformat eq 'genbank');
2123
2124	0					0	return _export_sequences($outpath, $outformat, $seqarr);
2125							}
2126
2127							=head2 random_dna
2128
2129							=cut
2130
2131							sub random_dna
2132							{
2133	30000			30000	1	149009	my ($self, @args) = @_;
2134
2135	30000					61811	my ($rlen, $rgc, $rstop)
2136							= $self->_rearrange([qw(
2137							length
2138							gc_percentage
2139							no_stops)], @args);
2140
2141							$self->throw("no codon table has been defined")
2142	30000	50	33			503282	if ($rstop && ! $self->{codontable});
2143
2144	30000		100			49969	$rgc = $rgc \|\| 50;
2145	30000	50	33			87887	$self->throw("gc_percentage must be between 0 and 100")
			33
2146							if ($rgc && ($rgc < 0 \|\| $rgc > 100));
2147
2148	30000	50	33			75979	if (! $rlen \|\| $rlen < 1)
		50
2149							{
2150	0					0	return q{};
2151							}
2152							elsif ($rlen == 1)
2153							{
2154	30000	50				56506	return $rgc ? _randombase_weighted($rgc) : _randombase;
2155							}
2156	0					0	return _randomDNA($rlen, $rgc, $rstop, $self->{codontable});
2157							}
2158
2159							=head2 replace_ambiguous_bases
2160
2161							=cut
2162
2163							sub replace_ambiguous_bases
2164							{
2165	3			3	1	18	my ($self, $seq) = @_;
2166
2167	3	50				7	$self->throw("no sequence provided ")
2168							unless ($seq);
2169
2170	3					13	my $str = $self->_stripdown($seq, q{}, 1);
2171
2172	3					11	my $newstr = _replace_ambiguous_bases($str);
2173
2174	3	50				8	if (ref $seq)
2175							{
2176	0					0	my $newobj = $seq->clone();
2177	0	0				0	my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
2178	0					0	$desc .= "deambiguated";
2179	0					0	$newobj->seq($newstr);
2180	0					0	$newobj->desc($desc);
2181	0					0	return $newobj;
2182							}
2183							else
2184							{
2185	3					7	return $newstr;
2186							}
2187							}
2188
2189							=head1 PLEASANTRIES
2190
2191							=head2 pad
2192
2193							my $name = 5;
2194							my $nice = $GD->pad($name, 3);
2195							$nice == "005" \|\| die;
2196
2197							$name = "oligo";
2198							$nice = $GD->pad($name, 7, "_");
2199							$nice == "__oligo" \|\| die;
2200
2201							Pads an integer with leading zeroes (by default) or any provided set of
2202							characters. This is useful both to make reports pretty and to standardize the
2203							length of designations.
2204
2205							=cut
2206
2207							sub pad
2208							{
2209	0			0	1	0	my ($self, $num, $thickness, $chars) = @_;
2210	0					0	my $t = $num;
2211	0		0			0	$chars = $chars \|\| "0";
2212	0					0	$t = $chars . $t while (length($t) < $thickness);
2213	0					0	return $t;
2214							}
2215
2216							=head2 attitude
2217
2218							my $adverb = $GD->attitude();
2219
2220							Ask GeneDesign how it handled your request.
2221
2222							=cut
2223
2224							sub attitude
2225							{
2226	0			0	1	0	my @adverbs = qw(Elegantly Energetically Enthusiastically Excitedly Daintily
2227							Deliberately Diligently Dreamily Courageously Cooly Cleverly Cheerfully
2228							Carefully Calmly Briskly Blindly Bashfully Absentmindedly Awkwardly
2229							Faithfully Ferociously Fervently Fiercely Fondly Gently Gleefully Gratefully
2230							Gracefully Happily Helpfully Heroically Honestly Joyfully Jubilantly
2231							Jovially Keenly Kindly Knowingly Kookily Loftily Lovingly Loyally
2232							Majestically Mechanically Merrily Mostly Neatly Nicely Obediently Officially
2233							Optimistically Patiently Perfectly Playfully Positively Powerfully
2234							Punctually Properly Promptly Quaintly Quickly Quirkily Rapidly Readily
2235							Reassuringly Righteously Sedately Seriously Sharply Shyly Silently Smoothly
2236							Solemnly Speedily Strictly Successfully Suddenly Sweetly Swiftly Tenderly
2237							Thankfully Throroughly Thoughtfully Triumphantly Ultimately Unabashedly
2238							Utterly Upliftingly Urgently Usefully Valiantly Victoriously Vivaciously
2239							Warmly Wholly Wisely Wonderfully Yawningly Zealously Zestfully
2240							);
2241	0					0	my $index = _random_index(scalar(@adverbs));
2242	0					0	return $adverbs[$index];
2243							}
2244
2245							=head2 endslash
2246
2247							=cut
2248
2249							sub endslash
2250							{
2251	0			0	1	0	my ($self, $path) = @_;
2252	0	0				0	if ((substr $path, -1, 1) ne q{/})
2253							{
2254	0					0	$path .= q{/};
2255							}
2256	0					0	return $path;
2257							}
2258
2259							=head2 _stripdown
2260
2261							=cut
2262
2263							sub _stripdown
2264							{
2265	40063			40063		65804	my ($self, $seqarg, $type, $enz_allowed) = @_;
2266
2267	40063		100			88566	$enz_allowed = $enz_allowed \|\| 0;
2268	40063	100				90632	my @seqs = ref $seqarg eq 'ARRAY' ? @$seqarg : ($seqarg);
2269	40063					50476	my @list;
2270	40063					62089	foreach my $seq (@seqs)
2271							{
2272	40063					52913	my $str = $seq;
2273	40063					55333	my $ref = ref $seq;
2274	40063	100				65914	if ($ref)
2275							{
2276	40028					73365	my $bit = $self->_checkref($seq, $enz_allowed);
2277	40028	50				78889	$self->throw("object $ref is not a compatible object $bit") if ($bit < 1);
2278	40028	50				80507	$str = ref $seq->seq ? $seq->seq->seq : $seq->seq;
2279							}
2280	40063					833833	push @list, uc $str;
2281							}
2282	40063	100				76669	return \@list if ($type eq 'ARRAY');
2283	40060					78580	return $list[0];
2284							}
2285
2286							=head2 _checkref
2287
2288							=cut
2289
2290							sub _checkref
2291							{
2292	40028			40028		60430	my ($self, $pobj, $enz_allowed) = @_;
2293	40028					51913	my $ref = ref $pobj;
2294	40028	50				67021	return -1 if (! $ref);
2295	40028		100			72644	$enz_allowed = $enz_allowed \|\| 0;
2296	40028					55675	my ($bioseq, $bioseqfeat) = (0, 0);
2297	40028					84711	$bioseq = $pobj->isa("Bio::Seq");
2298	40028					103258	$bioseqfeat = $pobj->isa("Bio::SeqFeatureI");
2299	40028	100				67272	if ($enz_allowed)
2300							{
2301	20016					47310	$enz_allowed = $pobj->isa("Bio::GeneDesign::RestrictionEnzyme");
2302							}
2303	40028					70439	return $bioseq + $bioseqfeat + $enz_allowed;
2304							}
2305
2306							1;
2307
2308							__END__