File Coverage

Bio/Tools/CodonTable.pm
Criterion Covered Total %
statement 225 239 94.1
branch 75 92 81.5
condition 25 41 60.9
subroutine 24 25 96.0
pod 14 14 100.0
total 363 411 88.3


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::Tools::CodonTable
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Heikki Lehvaslaiho
7             #
8             # Copyright Heikki Lehvaslaiho
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::CodonTable - Codon table object
17              
18             =head1 SYNOPSIS
19              
20             # This is a read-only class for all known codon tables. The IDs are
21             # the ones used by nucleotide sequence databases. All common IUPAC
22             # ambiguity codes for DNA, RNA and amino acids are recognized.
23              
24             use Bio::Tools::CodonTable;
25              
26             # defaults to ID 1 "Standard"
27             $myCodonTable = Bio::Tools::CodonTable->new();
28             $myCodonTable2 = Bio::Tools::CodonTable->new( -id => 3 );
29              
30             # change codon table
31             $myCodonTable->id(5);
32              
33             # examine codon table
34             print join (' ', "The name of the codon table no.", $myCodonTable->id(4),
35             "is:", $myCodonTable->name(), "\n");
36              
37             # print possible codon tables
38             $tables = Bio::Tools::CodonTable->tables;
39             while ( ($id,$name) = each %{$tables} ) {
40             print "$id = $name\n";
41             }
42              
43             # translate a codon
44             $aa = $myCodonTable->translate('ACU');
45             $aa = $myCodonTable->translate('act');
46             $aa = $myCodonTable->translate('ytr');
47              
48             # reverse translate an amino acid
49             @codons = $myCodonTable->revtranslate('A');
50             @codons = $myCodonTable->revtranslate('Ser');
51             @codons = $myCodonTable->revtranslate('Glx');
52             @codons = $myCodonTable->revtranslate('cYS', 'rna');
53              
54             # reverse translate an entire amino acid sequence into a IUPAC
55             # nucleotide string
56              
57             my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
58             my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
59              
60             # boolean tests
61             print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
62             print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar');
63             print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
64              
65             =head1 DESCRIPTION
66              
67             Codon tables are also called translation tables or genetic codes
68             since that is what they represent. A bit more complete picture
69             of the full complexity of codon usage in various taxonomic groups
70             is presented at the NCBI Genetic Codes Home page.
71              
72             CodonTable is a BioPerl class that knows all current translation
73             tables that are used by primary nucleotide sequence databases
74             (GenBank, EMBL and DDBJ). It provides methods to output information
75             about tables and relationships between codons and amino acids.
76              
77             This class and its methods recognized all common IUPAC ambiguity codes
78             for DNA, RNA and animo acids. The translation method follows the
79             conventions in EMBL and TREMBL databases.
80              
81             It is a nuisance to separate RNA and cDNA representations of nucleic
82             acid transcripts. The CodonTable object accepts codons of both type as
83             input and allows the user to set the mode for output when reverse
84             translating. Its default for output is DNA.
85              
86             Note:
87              
88             This class deals primarily with individual codons and amino
89             acids. However in the interest of speed you can L
90             longer sequence, too. The full complexity of protein translation
91             is tackled by L.
92              
93              
94             The amino acid codes are IUPAC recommendations for common amino acids:
95              
96             A Ala Alanine
97             R Arg Arginine
98             N Asn Asparagine
99             D Asp Aspartic acid
100             C Cys Cysteine
101             Q Gln Glutamine
102             E Glu Glutamic acid
103             G Gly Glycine
104             H His Histidine
105             I Ile Isoleucine
106             L Leu Leucine
107             K Lys Lysine
108             M Met Methionine
109             F Phe Phenylalanine
110             P Pro Proline
111             O Pyl Pyrrolysine (22nd amino acid)
112             U Sec Selenocysteine (21st amino acid)
113             S Ser Serine
114             T Thr Threonine
115             W Trp Tryptophan
116             Y Tyr Tyrosine
117             V Val Valine
118             B Asx Aspartic acid or Asparagine
119             Z Glx Glutamine or Glutamic acid
120             J Xle Isoleucine or Valine (mass spec ambiguity)
121             X Xaa Any or unknown amino acid
122              
123              
124             It is worth noting that, "Bacterial" codon table no. 11 produces an
125             polypeptide that is, confusingly, identical to the standard one. The
126             only differences are in available initiator codons.
127              
128              
129             NCBI Genetic Codes home page:
130             (Last update of the Genetic Codes: April 30, 2013)
131             https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
132              
133             ASN.1 version with ids 1 to 25 is at:
134             ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
135              
136             Thanks to Matteo diTomasso for the original Perl implementation
137             of these tables.
138              
139             =head1 FEEDBACK
140              
141             =head2 Mailing Lists
142              
143             User feedback is an integral part of the evolution of this and other
144             Bioperl modules. Send your comments and suggestions preferably to the
145             Bioperl mailing lists Your participation is much appreciated.
146              
147             bioperl-l@bioperl.org - General discussion
148             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
149              
150             =head2 Support
151              
152             Please direct usage questions or support issues to the mailing list:
153              
154             I
155              
156             rather than to the module maintainer directly. Many experienced and
157             reponsive experts will be able look at the problem and quickly
158             address it. Please include a thorough description of the problem
159             with code and data examples if at all possible.
160              
161             =head2 Reporting Bugs
162              
163             Report bugs to the Bioperl bug tracking system to help us keep track
164             the bugs and their resolution. Bug reports can be submitted via the
165             web:
166              
167             https://github.com/bioperl/bioperl-live/issues
168              
169             =head1 AUTHOR - Heikki Lehvaslaiho
170              
171             Email: heikki-at-bioperl-dot-org
172              
173             =head1 APPENDIX
174              
175             The rest of the documentation details each of the object
176             methods. Internal methods are usually preceded with a _
177              
178             =cut
179              
180             # Let the code begin...
181              
182             package Bio::Tools::CodonTable;
183 203         17496 use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA $CODONGAP $GAP
184 203     203   1786 %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
  203         246  
185 203     203   693 use strict;
  203         238  
  203         3582  
186              
187             # Object preamble - inherits from Bio::Root::Root
188 203     203   60256 use Bio::Tools::IUPAC;
  203         320  
  203         4615  
189 203     203   89367 use Bio::SeqUtils;
  203         357  
  203         5769  
190              
191 203     203   1097 use base qw(Bio::Root::Root);
  203         213  
  203         14206  
192              
193              
194             # first set internal values for all translation tables
195              
196             BEGIN {
197 203     203   858 use constant CODONSIZE => 3;
  203         244  
  203         54678  
198 203     203   415 $GAP = '-';
199 203         525 $CODONGAP = $GAP x CODONSIZE;
200              
201 203         887 @NAMES = #id
202             (
203             'Strict', #0, special option for ATG-only start
204             'Standard', #1
205             'Vertebrate Mitochondrial',#2
206             'Yeast Mitochondrial',# 3
207             'Mold, Protozoan, and Coelenterate Mitochondrial and Mycoplasma/Spiroplasma',#4
208             'Invertebrate Mitochondrial',#5
209             'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
210             '', '',
211             'Echinoderm and Flatworm Mitochondrial',#9
212             'Euplotid Nuclear',#10
213             'Bacterial, Archaeal and Plant Plastid',# 11
214             'Alternative Yeast Nuclear',# 12
215             'Ascidian Mitochondrial',# 13
216             'Alternative Flatworm Mitochondrial',# 14
217             'Blepharisma Nuclear',# 15
218             'Chlorophycean Mitochondrial',# 16
219             '', '', '', '',
220             'Trematode Mitochondrial',# 21
221             'Scenedesmus obliquus Mitochondrial', #22
222             'Thraustochytrium Mitochondrial', #23
223             'Pterobranchia Mitochondrial', #24
224             'Candidate Division SR1 and Gracilibacteria', #25
225             );
226              
227 203         764 @TABLES =
228             qw(
229             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
230             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
231             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
232             FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
233             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
234             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
235             FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
236             '' ''
237             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
238             FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
239             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
240             FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
241             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
242             FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
243             FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
244             FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
245             '' '' '' ''
246             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
247             FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
248             FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
249             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG
250             FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
251             );
252              
253             # (bases used for these tables, for reference)
254             # 1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
255             # 2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
256             # 3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
257              
258 203         785 @STARTS =
259             qw(
260             -----------------------------------M----------------------------
261             ---M---------------M---------------M----------------------------
262             --------------------------------MMMM---------------M------------
263             ----------------------------------MM----------------------------
264             --MM---------------M------------MMMM---------------M------------
265             ---M----------------------------MMMM---------------M------------
266             -----------------------------------M----------------------------
267             '' ''
268             -----------------------------------M---------------M------------
269             -----------------------------------M----------------------------
270             ---M---------------M------------MMMM---------------M------------
271             -------------------M---------------M----------------------------
272             ---M------------------------------MM---------------M------------
273             -----------------------------------M----------------------------
274             -----------------------------------M----------------------------
275             -----------------------------------M----------------------------
276             '' '' '' ''
277             -----------------------------------M---------------M------------
278             -----------------------------------M----------------------------
279             --------------------------------M--M---------------M------------
280             ---M---------------M---------------M---------------M------------
281             ---M-------------------------------M---------------M------------
282             );
283              
284 203         382 my @nucs = qw(t c a g);
285 203         274 my $x = 0;
286 203         441 ($CODONS, $TRCOL) = ({}, {});
287 203         436 for my $i (@nucs) {
288 812         697 for my $j (@nucs) {
289 3248         2354 for my $k (@nucs) {
290 12992         9340 my $codon = "$i$j$k";
291 12992         15171 $CODONS->{$codon} = $x;
292 12992         15295 $TRCOL->{$x} = $codon;
293 12992         9666 $x++;
294             }
295             }
296             }
297 203         1213 %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
298 203         793 %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
299 203         870 %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
300 203         1100 $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
301 203         349993 $TERMINATOR = '*';
302             }
303              
304             sub new {
305 505     505 1 1569 my($class,@args) = @_;
306 505         1147 my $self = $class->SUPER::new(@args);
307              
308 505         1309 my($id) =
309             $self->_rearrange([qw(ID
310             )],
311             @args);
312              
313 505 100       1057 $id = 1 if ( ! $id );
314 505 50       1161 $id && $self->id($id);
315 505         814 return $self; # success - we hope!
316             }
317              
318             =head2 id
319              
320             Title : id
321             Usage : $obj->id(3); $id_integer = $obj->id();
322             Function: Sets or returns the id of the translation table. IDs are
323             integers from 0 (special ATG-only start) to 25, excluding
324             7-8 and 17-20 which have been removed. If an invalid ID is
325             given the method returns 1, the standard table.
326             Example :
327             Returns : value of id, a scalar, warn and fall back to 1 (standard table)
328             if specified id is not valid
329             Args : newvalue (optional)
330              
331             =cut
332              
333             sub id{
334 25860     25860 1 19358 my ($self,$value) = @_;
335 25860 100       29756 if( defined $value) {
336 644 100 66     2230 if ( not defined $TABLES[$value] or $TABLES[$value] eq '') {
337 1         9 $self->warn("Not a valid codon table ID [$value], using [1] instead ");
338 1         2 $value = 1;
339             }
340 644         764 $self->{'id'} = $value;
341             }
342 25860         22613 return $self->{'id'};
343             }
344              
345             =head2 name
346              
347             Title : name
348             Usage : $obj->name()
349             Function: returns the descriptive name of the translation table
350             Example :
351             Returns : A string
352             Args : None
353              
354              
355             =cut
356              
357             sub name{
358 1     1 1 2 my ($self) = @_;
359              
360 1         1 my ($id) = $self->{'id'};
361 1         4 return $NAMES[$id];
362             }
363              
364             =head2 tables
365              
366             Title : tables
367             Usage : $obj->tables() or Bio::Tools::CodonTable->tables()
368             Function: returns a hash reference where each key is a valid codon
369             table id() number, and each value is the corresponding
370             codon table name() string
371             Example :
372             Returns : A hashref
373             Args : None
374              
375              
376             =cut
377              
378             sub tables{
379 2     2 1 791 my %tables;
380 2         5 for my $id (0 .. $#NAMES) {
381 52         28 my $name = $NAMES[$id];
382 52 100       79 $tables{$id} = $name if $name;
383             }
384 2         4 return \%tables;
385             }
386              
387             =head2 translate
388              
389             Title : translate
390             Usage : $obj->translate('YTR')
391             Function: Returns a string of one letter amino acid codes from
392             nucleotide sequence input. The imput can be of any length.
393              
394             Returns 'X' for unknown codons and codons that code for
395             more than one amino acid. Returns an empty string if input
396             is not three characters long. Exceptions for these are:
397              
398             - IUPAC amino acid code B for Aspartic Acid and
399             Asparagine, is used.
400             - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
401             used.
402             - if the codon is two nucleotides long and if by adding
403             an a third character 'N', it codes for a single amino
404             acid (with exceptions above), return that, otherwise
405             return empty string.
406              
407             Returns empty string for other input strings that are not
408             three characters long.
409              
410             Example :
411             Returns : a string of one letter ambiguous IUPAC amino acid codes
412             Args : ambiguous IUPAC nucleotide string
413              
414              
415             =cut
416              
417             sub translate {
418 19902     19902 1 19535 my ($self, $seq, $complete_codon) = @_;
419 19902 100       22907 $self->throw("Calling translate without a seq argument!") unless defined $seq;
420 19901 100       20940 return '' unless $seq;
421              
422 19900         21213 my $id = $self->id;
423 19900         13972 my ($partial) = 0;
424 19900 100       24766 $partial = 2 if length($seq) % CODONSIZE == 2;
425            
426 19900         15896 $seq = lc $seq;
427 19900         14088 $seq =~ tr/u/t/;
428 19900         12778 my $protein = "";
429 19900 100       34675 if ($seq =~ /[^actg]/ ) { #ambiguous chars
430 5315         8431 for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
431 17779         13041 my $triplet = substr($seq, $i, CODONSIZE);
432 17779 100       22028 if( $triplet eq $CODONGAP ) {
    100          
433 1527         1896 $protein .= $GAP;
434             } elsif (exists $CODONS->{$triplet}) {
435             $protein .= substr($TABLES[$id],
436 10946         15656 $CODONS->{$triplet},1);
437             } else {
438 5306         6328 $protein .= $self->_translate_ambiguous_codon($triplet);
439             }
440             }
441             } else { # simple, strict translation
442 14585         22708 for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
443 124418         83691 my $triplet = substr($seq, $i, CODONSIZE);
444 124418 50       128067 if( $triplet eq $CODONGAP ) {
445 0         0 $protein .= $GAP;
446             }
447 124418 50       111405 if (exists $CODONS->{$triplet}) {
448 124418         185054 $protein .= substr($TABLES[$id], $CODONS->{$triplet}, 1);
449             } else {
450 0         0 $protein .= 'X';
451             }
452             }
453             }
454 19900 100 100     28032 if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides
455 7         13 my $triplet = substr($seq, ($partial -4)). "n";
456 7 50       20 if( $triplet eq $CODONGAP ) {
    50          
457 0         0 $protein .= $GAP;
458             } elsif (exists $CODONS->{$triplet}) {
459 0         0 my $aa = substr($TABLES[$id], $CODONS->{$triplet},1);
460 0         0 $protein .= $aa;
461             } else {
462 7         10 $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
463             }
464             }
465 19900         26973 return $protein;
466             }
467              
468             sub _translate_ambiguous_codon {
469 5313     5313   4487 my ($self, $triplet, $partial) = @_;
470 5313   100     11948 $partial ||= 0;
471 5313         5067 my $id = $self->id;
472 5313         3826 my $aa;
473 5313         5713 my @codons = $self->unambiguous_codons($triplet);
474 5313         4721 my %aas =();
475 5313         4446 foreach my $codon (@codons) {
476 401         478 $aas{substr($TABLES[$id],$CODONS->{$codon},1)} = 1;
477             }
478 5313         4378 my $count = scalar keys %aas;
479 5313 100       8187 if ( $count == 1 ) {
    100          
480 141         148 $aa = (keys %aas)[0];
481             }
482             elsif ( $count == 2 ) {
483 13 100 66     36 if ($aas{'D'} and $aas{'N'}) {
    100 66        
484 3         4 $aa = 'B';
485             }
486             elsif ($aas{'E'} and $aas{'Q'}) {
487 4         3 $aa = 'Z';
488             } else {
489 6 100       10 $partial ? ($aa = '') : ($aa = 'X');
490             }
491             } else {
492 5159 100       5576 $partial ? ($aa = '') : ($aa = 'X');
493             }
494 5313         12982 return $aa;
495             }
496              
497             =head2 translate_strict
498              
499             Title : translate_strict
500             Usage : $obj->translate_strict('ACT')
501             Function: returns one letter amino acid code for a codon input
502              
503             Fast and simple translation. User is responsible to resolve
504             ambiguous nucleotide codes before calling this
505             method. Returns 'X' for unknown codons and an empty string
506             for input strings that are not three characters long.
507              
508             It is not recommended to use this method in a production
509             environment. Use method translate, instead.
510              
511             Example :
512             Returns : A string
513             Args : a codon = a three nucleotide character string
514              
515              
516             =cut
517              
518             sub translate_strict{
519 3     3 1 5 my ($self, $value) = @_;
520 3         4 my $id = $self->{'id'};
521              
522 3         4 $value = lc $value;
523 3         3 $value =~ tr/u/t/;
524              
525 3 50       7 return '' unless length $value == 3;
526              
527 3 50       7 return 'X' unless defined $CODONS->{$value};
528              
529 3         10 return substr( $TABLES[$id], $CODONS->{$value}, 1 );
530             }
531              
532             =head2 revtranslate
533              
534             Title : revtranslate
535             Usage : $obj->revtranslate('G')
536             Function: returns codons for an amino acid
537              
538             Returns an empty string for unknown amino acid
539             codes. Ambiguous IUPAC codes Asx,B, (Asp,D; Asn,N) and
540             Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
541             letter amino acid codes are accepted. '*' and 'Ter' are
542             used for terminator.
543              
544             By default, the output codons are shown in DNA. If the
545             output is needed in RNA (tr/t/u/), add a second argument
546             'RNA'.
547              
548             Example : $obj->revtranslate('Gly', 'RNA')
549             Returns : An array of three lower case letter strings i.e. codons
550             Args : amino acid, 'RNA'
551              
552             =cut
553              
554             sub revtranslate {
555 135     135 1 842 my ($self, $value, $coding) = @_;
556 135         81 my @codons;
557              
558 135 100       186 if (length($value) == 3 ) {
559 5         4 $value = lc $value;
560 5         7 $value = ucfirst $value;
561 5         8 $value = $THREELETTERSYMBOLS{$value};
562             }
563 135 100 100     766 if ( defined $value and $value =~ /$VALID_PROTEIN/
      66        
564             and length($value) == 1
565             ) {
566 133         131 my $id = $self->{'id'};
567              
568 133         116 $value = uc $value;
569 133         78 my @aas = @{$IUPAC_AA{$value}};
  133         198  
570 133         130 foreach my $aa (@aas) {
571             #print $aa, " -2\n";
572 145 100       177 $aa = '\*' if $aa eq '*';
573 145         912 while ($TABLES[$id] =~ m/$aa/g) {
574 399         280 my $p = pos $TABLES[$id];
575 399         935 push (@codons, $TRCOL->{--$p});
576             }
577             }
578             }
579              
580 135 100 66     204 if ($coding and uc ($coding) eq 'RNA') {
581 1         3 for my $i (0..$#codons) {
582 1         2 $codons[$i] =~ tr/t/u/;
583             }
584             }
585              
586 135         298 return @codons;
587             }
588              
589             =head2 reverse_translate_all
590              
591             Title : reverse_translate_all
592             Usage : my $iup_str = $cttable->reverse_translate_all($seq_object)
593             my $iup_str = $cttable->reverse_translate_all($seq_object,
594             $cutable,
595             15);
596             Function: reverse translates a protein sequence into IUPAC nucleotide
597             sequence. An 'X' in the protein sequence is converted to 'NNN'
598             in the nucleotide sequence.
599             Returns : a string
600             Args : a Bio::PrimarySeqI compatible object (mandatory)
601             a Bio::CodonUsage::Table object and a threshold if only
602             codons with a relative frequency above the threshold are
603             to be considered.
604             =cut
605              
606             sub reverse_translate_all {
607 22     22 1 26 my ($self, $obj, $cut, $threshold) = @_;
608              
609             ## check args are OK
610              
611 22 50 33     89 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
612 0         0 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
613             ref($obj) . "]");
614             }
615 22 50       36 if($obj->alphabet ne 'protein') {
616 0         0 $self->throw("Cannot reverse translate, need an amino acid sequence .".
617             "This sequence is of type [" . $obj->alphabet ."]");
618             }
619 22         19 my @data;
620 22         35 my @seq = split '', $obj->seq;
621              
622             ## if we're not supplying a codon usage table...
623 22 100 66     77 if( !$cut && !$threshold) {
624             ## get lists of possible codons for each aa.
625 21         26 for my $aa (@seq) {
626 75 100       108 if ($aa =~ /x/i) {
627 7         11 push @data, (['NNN']);
628             }else {
629 68         91 my @cods = $self->revtranslate($aa);
630 68         106 push @data, \@cods;
631             }
632             }
633             }else{
634             #else we are supplying a codon usage table, we just want common codons
635             #check args first.
636 1 50       4 if(!$cut->isa('Bio::CodonUsage::Table')) {
637 0         0 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
638             ref($cut). "].");
639             }
640 1         3 my $cod_ref = $cut->probable_codons($threshold);
641 1         2 for my $aa (@seq) {
642 21 100       30 if ($aa =~ /x/i) {
643 1         2 push @data, (['NNN']);
644 1         3 next;
645             }
646 20         16 push @data, $cod_ref->{$aa};
647             }
648             }
649              
650 22         38 return $self->_make_iupac_string(\@data);
651             }
652              
653             =head2 reverse_translate_best
654              
655             Title : reverse_translate_best
656             Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
657             Function: Reverse translates a protein sequence into plain nucleotide
658             sequence (GATC), uses the most common codon for each amino acid
659             Returns : A string
660             Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
661              
662             =cut
663              
664             sub reverse_translate_best {
665              
666 1     1 1 2 my ($self, $obj, $cut) = @_;
667              
668 1 50 33     6 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
669 0         0 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
670             ref($obj) . "]");
671             }
672 1 50       3 if ($obj->alphabet ne 'protein') {
673 0         0 $self->throw("Cannot reverse translate, need an amino acid sequence .".
674             "This sequence is of type [" . $obj->alphabet ."]");
675             }
676 1 50       7 if ( !$cut | !$cut->isa('Bio::CodonUsage::Table')) {
677 0         0 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
678             ref($cut). "].");
679             }
680              
681 1         1 my $str = '';
682 1         2 my @seq = split '', $obj->seq;
683              
684 1         3 my $cod_ref = $cut->most_common_codons();
685              
686 1         2 for my $aa ( @seq ) {
687 21 100       27 if ($aa =~ /x/i) {
688 1         2 $str .= 'NNN';
689 1         1 next;
690             }
691 20 50       19 if ( defined $cod_ref->{$aa} ) {
692 20         17 $str .= $cod_ref->{$aa};
693             } else {
694 0         0 $self->throw("Input sequence contains invalid character: $aa");
695             }
696             }
697 1         6 return $str;
698             }
699              
700             =head2 is_start_codon
701              
702             Title : is_start_codon
703             Usage : $obj->is_start_codon('ATG')
704             Function: returns true (1) for all codons that can be used as a
705             translation start, false (0) for others.
706             Example : $myCodonTable->is_start_codon('ATG')
707             Returns : boolean
708             Args : codon
709              
710             =cut
711              
712             sub is_start_codon{
713 718     718 1 762 shift->_codon_is( shift, \@STARTS, 'M' );
714             }
715              
716             =head2 is_ter_codon
717              
718             Title : is_ter_codon
719             Usage : $obj->is_ter_codon('GAA')
720             Function: returns true (1) for all codons that can be used as a
721             translation tarminator, false (0) for others.
722             Example : $myCodonTable->is_ter_codon('ATG')
723             Returns : boolean
724             Args : codon
725              
726             =cut
727              
728             sub is_ter_codon{
729 613     613 1 471 my ($self, $value) = @_;
730 613         433 my $id = $self->{'id'};
731              
732             # We need to ensure U is mapped to T (ie. UAG)
733 613         437 $value = uc $value;
734 613         410 $value =~ tr/U/T/;
735              
736 613 100       619 if (length $value != 3 ) {
737             # Incomplete codons are not stop codons
738 1         3 return 0;
739             } else {
740 612         386 my $result = 0;
741              
742             # For all the possible codons, if any are not a stop
743             # codon, fail immediately
744 612         592 for my $c ( $self->unambiguous_codons($value) ) {
745 615         660 my $m = substr( $TABLES[$id], $CODONS->{$c}, 1 );
746 615 100       613 if($m eq $TERMINATOR) {
747 46         50 $result = 1;
748             } else {
749 569         2514 return 0;
750             }
751             }
752 43         97 return $result;
753             }
754             }
755              
756             # desc: compares the passed value with a single entry in the given
757             # codon table
758             # args: a value (typically a three-char string like 'atg'),
759             # a reference to the appropriate set of codon tables,
760             # a single-character value to check for at the position in the
761             # given codon table
762             # ret: boolean, true if the given codon table contains the $key at the
763             # position corresponding to $value
764             sub _codon_is {
765 718     718   562 my ($self, $value, $table, $key ) = @_;
766              
767 718 50       866 return 0 unless length $value == 3;
768              
769 718         489 $value = lc $value;
770 718         471 $value =~ tr/u/t/;
771              
772 718         522 my $id = $self->{'id'};
773 718         651 for my $c ( $self->unambiguous_codons($value) ) {
774 720         727 my $m = substr( $table->[$id], $CODONS->{$c}, 1 );
775 720 100       1070 if ($m eq $key) { return 1; }
  55         126  
776             }
777 663         1640 return 0;
778             }
779              
780             =head2 is_unknown_codon
781              
782             Title : is_unknown_codon
783             Usage : $obj->is_unknown_codon('GAJ')
784             Function: returns false (0) for all codons that are valid,
785             true (1) for others.
786             Example : $myCodonTable->is_unknown_codon('NTG')
787             Returns : boolean
788             Args : codon
789              
790              
791             =cut
792              
793             sub is_unknown_codon{
794 3     3 1 3 my ($self, $value) = @_;
795 3         5 $value = lc $value;
796 3         4 $value =~ tr/u/t/;
797 3 100       4 return 1 unless $self->unambiguous_codons($value);
798 1         3 return 0;
799             }
800              
801             =head2 unambiguous_codons
802              
803             Title : unambiguous_codons
804             Usage : @codons = $self->unambiguous_codons('ACN')
805             Returns : array of strings (one-letter unambiguous amino acid codes)
806             Args : a codon = a three IUPAC nucleotide character string
807              
808             =cut
809              
810             sub unambiguous_codons{
811 6646     6646 1 4675 my ($self,$value) = @_;
812 6646         10107 my @nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
  19937         22101  
813              
814 6646         5462 my @codons;
815 6646         4095 for my $i ( @{$nts[0]} ) {
  6646         9111  
816 1658         969 for my $j ( @{$nts[1]} ) {
  1658         1407  
817 1633         907 for my $k ( @{$nts[2]} ) {
  1633         1413  
818 1804         2680 push @codons, lc "$i$j$k";
819             }}}
820 6646         8162 return @codons;
821             }
822              
823             =head2 _unambiquous_codons
824              
825             deprecated, now an alias for unambiguous_codons
826              
827             =cut
828              
829             sub _unambiquous_codons {
830 0     0   0 unambiguous_codons( undef, @_ );
831             }
832              
833             =head2 add_table
834              
835             Title : add_table
836             Usage : $newid = $ct->add_table($name, $table, $starts)
837             Function: Add a custom Codon Table into the object.
838             Know what you are doing, only the length of
839             the argument strings is checked!
840             Returns : the id of the new codon table
841             Args : name, a string, optional (can be empty)
842             table, a string of 64 characters
843             startcodons, a string of 64 characters, defaults to standard
844              
845             =cut
846              
847             sub add_table {
848 1     1 1 2 my ($self, $name, $table, $starts) = @_;
849              
850 1   33     2 $name ||= 'Custom' . $#NAMES + 1;
851 1   33     4 $starts ||= $STARTS[1];
852 1 50 33     5 $self->throw('Suspect input!')
853             unless length($table) == 64 and length($starts) == 64;
854              
855 1         2 push @NAMES, $name;
856 1         2 push @TABLES, $table;
857 1         1 push @STARTS, $starts;
858              
859 1         3 return $#NAMES;
860             }
861              
862             sub _make_iupac_string {
863 22     22   18 my ($self, $cod_ref) = @_;
864 22 50       47 if(ref($cod_ref) ne 'ARRAY') {
865 0         0 $self->throw(" I need a reference to a list of references to codons, ".
866             " not a [". ref($cod_ref) . "].");
867             }
868 22         52 my %iupac_hash = Bio::Tools::IUPAC->iupac_rev_iub();
869 22         38 my $iupac_string = ''; ## the string to be returned
870 22         23 for my $aa (@$cod_ref) {
871              
872             ## scan through codon positions, record the differing values,
873             # then look up in the iub hash
874 96         88 for my $index(0..2) {
875 288         150 my %h;
876 288         206 map { my $k = substr($_,$index,1);
  846         544  
877 846         682 $h{$k} = undef;} @$aa;
878 288         337 my $lookup_key = join '', sort{$a cmp $b}keys %h;
  225         224  
879              
880             ## extend string
881 288         378 $iupac_string .= $iupac_hash{uc$lookup_key};
882             }
883             }
884 22         149 return $iupac_string;
885             }
886              
887              
888             1;