File Coverage

Bio/Tools/CodonTable.pm
Criterion Covered Total %
statement 225 239 94.1
branch 75 92 81.5
condition 25 41 60.9
subroutine 24 25 96.0
pod 14 14 100.0
total 363 411 88.3


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::Tools::CodonTable
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Heikki Lehvaslaiho
7             #
8             # Copyright Heikki Lehvaslaiho
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::CodonTable - Codon table object
17              
18             =head1 SYNOPSIS
19              
20             # This is a read-only class for all known codon tables. The IDs are
21             # the ones used by nucleotide sequence databases. All common IUPAC
22             # ambiguity codes for DNA, RNA and amino acids are recognized.
23              
24             use Bio::Tools::CodonTable;
25              
26             # defaults to ID 1 "Standard"
27             $myCodonTable = Bio::Tools::CodonTable->new();
28             $myCodonTable2 = Bio::Tools::CodonTable->new( -id => 3 );
29              
30             # change codon table
31             $myCodonTable->id(5);
32              
33             # examine codon table
34             print join (' ', "The name of the codon table no.", $myCodonTable->id(4),
35             "is:", $myCodonTable->name(), "\n");
36              
37             # print possible codon tables
38             $tables = Bio::Tools::CodonTable->tables;
39             while ( ($id,$name) = each %{$tables} ) {
40             print "$id = $name\n";
41             }
42              
43             # translate a codon
44             $aa = $myCodonTable->translate('ACU');
45             $aa = $myCodonTable->translate('act');
46             $aa = $myCodonTable->translate('ytr');
47              
48             # reverse translate an amino acid
49             @codons = $myCodonTable->revtranslate('A');
50             @codons = $myCodonTable->revtranslate('Ser');
51             @codons = $myCodonTable->revtranslate('Glx');
52             @codons = $myCodonTable->revtranslate('cYS', 'rna');
53              
54             # reverse translate an entire amino acid sequence into a IUPAC
55             # nucleotide string
56              
57             my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
58             my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
59              
60             # boolean tests
61             print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
62             print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar');
63             print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
64              
65             =head1 DESCRIPTION
66              
67             Codon tables are also called translation tables or genetic codes
68             since that is what they represent. A bit more complete picture
69             of the full complexity of codon usage in various taxonomic groups
70             is presented at the NCBI Genetic Codes Home page.
71              
72             CodonTable is a BioPerl class that knows all current translation
73             tables that are used by primary nucleotide sequence databases
74             (GenBank, EMBL and DDBJ). It provides methods to output information
75             about tables and relationships between codons and amino acids.
76              
77             This class and its methods recognized all common IUPAC ambiguity codes
78             for DNA, RNA and animo acids. The translation method follows the
79             conventions in EMBL and TREMBL databases.
80              
81             It is a nuisance to separate RNA and cDNA representations of nucleic
82             acid transcripts. The CodonTable object accepts codons of both type as
83             input and allows the user to set the mode for output when reverse
84             translating. Its default for output is DNA.
85              
86             Note:
87              
88             This class deals primarily with individual codons and amino
89             acids. However in the interest of speed you can L
90             longer sequence, too. The full complexity of protein translation
91             is tackled by L.
92              
93              
94             The amino acid codes are IUPAC recommendations for common amino acids:
95              
96             A Ala Alanine
97             R Arg Arginine
98             N Asn Asparagine
99             D Asp Aspartic acid
100             C Cys Cysteine
101             Q Gln Glutamine
102             E Glu Glutamic acid
103             G Gly Glycine
104             H His Histidine
105             I Ile Isoleucine
106             L Leu Leucine
107             K Lys Lysine
108             M Met Methionine
109             F Phe Phenylalanine
110             P Pro Proline
111             O Pyl Pyrrolysine (22nd amino acid)
112             U Sec Selenocysteine (21st amino acid)
113             S Ser Serine
114             T Thr Threonine
115             W Trp Tryptophan
116             Y Tyr Tyrosine
117             V Val Valine
118             B Asx Aspartic acid or Asparagine
119             Z Glx Glutamine or Glutamic acid
120             J Xle Isoleucine or Valine (mass spec ambiguity)
121             X Xaa Any or unknown amino acid
122              
123              
124             It is worth noting that, "Bacterial" codon table no. 11 produces an
125             polypeptide that is, confusingly, identical to the standard one. The
126             only differences are in available initiator codons.
127              
128              
129             NCBI Genetic Codes home page:
130             (Last update of the Genetic Codes: April 30, 2013)
131             https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
132              
133             ASN.1 version with ids 1 to 25 is at:
134             ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
135              
136             Thanks to Matteo diTomasso for the original Perl implementation
137             of these tables.
138              
139             =head1 FEEDBACK
140              
141             =head2 Mailing Lists
142              
143             User feedback is an integral part of the evolution of this and other
144             Bioperl modules. Send your comments and suggestions preferably to the
145             Bioperl mailing lists Your participation is much appreciated.
146              
147             bioperl-l@bioperl.org - General discussion
148             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
149              
150             =head2 Support
151              
152             Please direct usage questions or support issues to the mailing list:
153              
154             I
155              
156             rather than to the module maintainer directly. Many experienced and
157             reponsive experts will be able look at the problem and quickly
158             address it. Please include a thorough description of the problem
159             with code and data examples if at all possible.
160              
161             =head2 Reporting Bugs
162              
163             Report bugs to the Bioperl bug tracking system to help us keep track
164             the bugs and their resolution. Bug reports can be submitted via the
165             web:
166              
167             https://github.com/bioperl/bioperl-live/issues
168              
169             =head1 AUTHOR - Heikki Lehvaslaiho
170              
171             Email: heikki-at-bioperl-dot-org
172              
173             =head1 APPENDIX
174              
175             The rest of the documentation details each of the object
176             methods. Internal methods are usually preceded with a _
177              
178             =cut
179              
180             # Let the code begin...
181              
182             package Bio::Tools::CodonTable;
183 203         18989 use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA $CODONGAP $GAP
184 203     203   2213 %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
  203         349  
185 203     203   1116 use strict;
  203         318  
  203         4289  
186              
187             # Object preamble - inherits from Bio::Root::Root
188 203     203   55182 use Bio::Tools::IUPAC;
  203         611  
  203         5240  
189 203     203   81909 use Bio::SeqUtils;
  203         538  
  203         6928  
190              
191 203     203   1493 use base qw(Bio::Root::Root);
  203         350  
  203         15445  
192              
193              
194             # first set internal values for all translation tables
195              
196             BEGIN {
197 203     203   1196 use constant CODONSIZE => 3;
  203         375  
  203         61599  
198 203     203   828 $GAP = '-';
199 203         687 $CODONGAP = $GAP x CODONSIZE;
200              
201 203         1162 @NAMES = #id
202             (
203             'Strict', #0, special option for ATG-only start
204             'Standard', #1
205             'Vertebrate Mitochondrial',#2
206             'Yeast Mitochondrial',# 3
207             'Mold, Protozoan, and Coelenterate Mitochondrial and Mycoplasma/Spiroplasma',#4
208             'Invertebrate Mitochondrial',#5
209             'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
210             '', '',
211             'Echinoderm and Flatworm Mitochondrial',#9
212             'Euplotid Nuclear',#10
213             'Bacterial, Archaeal and Plant Plastid',# 11
214             'Alternative Yeast Nuclear',# 12
215             'Ascidian Mitochondrial',# 13
216             'Alternative Flatworm Mitochondrial',# 14
217             'Blepharisma Nuclear',# 15
218             'Chlorophycean Mitochondrial',# 16
219             '', '', '', '',
220             'Trematode Mitochondrial',# 21
221             'Scenedesmus obliquus Mitochondrial', #22
222             'Thraustochytrium Mitochondrial', #23
223             'Pterobranchia Mitochondrial', #24
224             'Candidate Division SR1 and Gracilibacteria', #25
225             );
226              
227 203         911 @TABLES =
228             qw(
229             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
230             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
231             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
232             FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
233             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
234             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
235             FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
236             '' ''
237             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
238             FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
239             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
240             FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
241             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
242             FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
243             FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
244             FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
245             '' '' '' ''
246             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
247             FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
248             FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
249             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG
250             FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
251             );
252              
253             # (bases used for these tables, for reference)
254             # 1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
255             # 2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
256             # 3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
257              
258 203         880 @STARTS =
259             qw(
260             -----------------------------------M----------------------------
261             ---M---------------M---------------M----------------------------
262             --------------------------------MMMM---------------M------------
263             ----------------------------------MM----------------------------
264             --MM---------------M------------MMMM---------------M------------
265             ---M----------------------------MMMM---------------M------------
266             -----------------------------------M----------------------------
267             '' ''
268             -----------------------------------M---------------M------------
269             -----------------------------------M----------------------------
270             ---M---------------M------------MMMM---------------M------------
271             -------------------M---------------M----------------------------
272             ---M------------------------------MM---------------M------------
273             -----------------------------------M----------------------------
274             -----------------------------------M----------------------------
275             -----------------------------------M----------------------------
276             '' '' '' ''
277             -----------------------------------M---------------M------------
278             -----------------------------------M----------------------------
279             --------------------------------M--M---------------M------------
280             ---M---------------M---------------M---------------M------------
281             ---M-------------------------------M---------------M------------
282             );
283              
284 203         483 my @nucs = qw(t c a g);
285 203         362 my $x = 0;
286 203         448 ($CODONS, $TRCOL) = ({}, {});
287 203         573 for my $i (@nucs) {
288 812         1013 for my $j (@nucs) {
289 3248         3252 for my $k (@nucs) {
290 12992         13127 my $codon = "$i$j$k";
291 12992         20760 $CODONS->{$codon} = $x;
292 12992         20312 $TRCOL->{$x} = $codon;
293 12992         13467 $x++;
294             }
295             }
296             }
297 203         1355 %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
298 203         989 %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
299 203         1041 %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
300 203         1061 $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
301 203         386791 $TERMINATOR = '*';
302             }
303              
304             sub new {
305 505     505 1 1789 my($class,@args) = @_;
306 505         1422 my $self = $class->SUPER::new(@args);
307              
308 505         1773 my($id) =
309             $self->_rearrange([qw(ID
310             )],
311             @args);
312              
313 505 100       1401 $id = 1 if ( ! $id );
314 505 50       1743 $id && $self->id($id);
315 505         1017 return $self; # success - we hope!
316             }
317              
318             =head2 id
319              
320             Title : id
321             Usage : $obj->id(3); $id_integer = $obj->id();
322             Function: Sets or returns the id of the translation table. IDs are
323             integers from 0 (special ATG-only start) to 25, excluding
324             7-8 and 17-20 which have been removed. If an invalid ID is
325             given the method returns 1, the standard table.
326             Example :
327             Returns : value of id, a scalar, warn and fall back to 1 (standard table)
328             if specified id is not valid
329             Args : newvalue (optional)
330              
331             =cut
332              
333             sub id{
334 25860     25860 1 30290 my ($self,$value) = @_;
335 25860 100       32497 if( defined $value) {
336 644 100 66     2327 if ( not defined $TABLES[$value] or $TABLES[$value] eq '') {
337 1         12 $self->warn("Not a valid codon table ID [$value], using [1] instead ");
338 1         2 $value = 1;
339             }
340 644         1030 $self->{'id'} = $value;
341             }
342 25860         32088 return $self->{'id'};
343             }
344              
345             =head2 name
346              
347             Title : name
348             Usage : $obj->name()
349             Function: returns the descriptive name of the translation table
350             Example :
351             Returns : A string
352             Args : None
353              
354              
355             =cut
356              
357             sub name{
358 1     1 1 3 my ($self) = @_;
359              
360 1         3 my ($id) = $self->{'id'};
361 1         5 return $NAMES[$id];
362             }
363              
364             =head2 tables
365              
366             Title : tables
367             Usage : $obj->tables() or Bio::Tools::CodonTable->tables()
368             Function: returns a hash reference where each key is a valid codon
369             table id() number, and each value is the corresponding
370             codon table name() string
371             Example :
372             Returns : A hashref
373             Args : None
374              
375              
376             =cut
377              
378             sub tables{
379 2     2 1 801 my %tables;
380 2         6 for my $id (0 .. $#NAMES) {
381 52         47 my $name = $NAMES[$id];
382 52 100       88 $tables{$id} = $name if $name;
383             }
384 2         5 return \%tables;
385             }
386              
387             =head2 translate
388              
389             Title : translate
390             Usage : $obj->translate('YTR')
391             Function: Returns a string of one letter amino acid codes from
392             nucleotide sequence input. The imput can be of any length.
393              
394             Returns 'X' for unknown codons and codons that code for
395             more than one amino acid. Returns an empty string if input
396             is not three characters long. Exceptions for these are:
397              
398             - IUPAC amino acid code B for Aspartic Acid and
399             Asparagine, is used.
400             - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
401             used.
402             - if the codon is two nucleotides long and if by adding
403             an a third character 'N', it codes for a single amino
404             acid (with exceptions above), return that, otherwise
405             return empty string.
406              
407             Returns empty string for other input strings that are not
408             three characters long.
409              
410             Example :
411             Returns : a string of one letter ambiguous IUPAC amino acid codes
412             Args : ambiguous IUPAC nucleotide string
413              
414              
415             =cut
416              
417             sub translate {
418 19902     19902 1 33549 my ($self, $seq, $complete_codon) = @_;
419 19902 100       27346 $self->throw("Calling translate without a seq argument!") unless defined $seq;
420 19901 100       24845 return '' unless $seq;
421              
422 19900         24263 my $id = $self->id;
423 19900         21740 my ($partial) = 0;
424 19900 100       29891 $partial = 2 if length($seq) % CODONSIZE == 2;
425            
426 19900         23630 $seq = lc $seq;
427 19900         22038 $seq =~ tr/u/t/;
428 19900         20072 my $protein = "";
429 19900 100       40211 if ($seq =~ /[^actg]/ ) { #ambiguous chars
430 5315         10086 for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
431 17779         19080 my $triplet = substr($seq, $i, CODONSIZE);
432 17779 100       25226 if( $triplet eq $CODONGAP ) {
    100          
433 1527         2158 $protein .= $GAP;
434             } elsif (exists $CODONS->{$triplet}) {
435             $protein .= substr($TABLES[$id],
436 10946         17879 $CODONS->{$triplet},1);
437             } else {
438 5306         7625 $protein .= $self->_translate_ambiguous_codon($triplet);
439             }
440             }
441             } else { # simple, strict translation
442 14585         26710 for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
443 124418         126129 my $triplet = substr($seq, $i, CODONSIZE);
444 124418 50       143298 if( $triplet eq $CODONGAP ) {
445 0         0 $protein .= $GAP;
446             }
447 124418 50       139473 if (exists $CODONS->{$triplet}) {
448 124418         203514 $protein .= substr($TABLES[$id], $CODONS->{$triplet}, 1);
449             } else {
450 0         0 $protein .= 'X';
451             }
452             }
453             }
454 19900 100 100     32152 if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides
455 7         21 my $triplet = substr($seq, ($partial -4)). "n";
456 7 50       23 if( $triplet eq $CODONGAP ) {
    50          
457 0         0 $protein .= $GAP;
458             } elsif (exists $CODONS->{$triplet}) {
459 0         0 my $aa = substr($TABLES[$id], $CODONS->{$triplet},1);
460 0         0 $protein .= $aa;
461             } else {
462 7         18 $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
463             }
464             }
465 19900         37457 return $protein;
466             }
467              
468             sub _translate_ambiguous_codon {
469 5313     5313   6707 my ($self, $triplet, $partial) = @_;
470 5313   100     12930 $partial ||= 0;
471 5313         5811 my $id = $self->id;
472 5313         4760 my $aa;
473 5313         7235 my @codons = $self->unambiguous_codons($triplet);
474 5313         5091 my %aas =();
475 5313         5709 foreach my $codon (@codons) {
476 401         662 $aas{substr($TABLES[$id],$CODONS->{$codon},1)} = 1;
477             }
478 5313         6141 my $count = scalar keys %aas;
479 5313 100       7359 if ( $count == 1 ) {
    100          
480 141         216 $aa = (keys %aas)[0];
481             }
482             elsif ( $count == 2 ) {
483 13 100 66     55 if ($aas{'D'} and $aas{'N'}) {
    100 66        
484 3         5 $aa = 'B';
485             }
486             elsif ($aas{'E'} and $aas{'Q'}) {
487 4         6 $aa = 'Z';
488             } else {
489 6 100       13 $partial ? ($aa = '') : ($aa = 'X');
490             }
491             } else {
492 5159 100       6512 $partial ? ($aa = '') : ($aa = 'X');
493             }
494 5313         13796 return $aa;
495             }
496              
497             =head2 translate_strict
498              
499             Title : translate_strict
500             Usage : $obj->translate_strict('ACT')
501             Function: returns one letter amino acid code for a codon input
502              
503             Fast and simple translation. User is responsible to resolve
504             ambiguous nucleotide codes before calling this
505             method. Returns 'X' for unknown codons and an empty string
506             for input strings that are not three characters long.
507              
508             It is not recommended to use this method in a production
509             environment. Use method translate, instead.
510              
511             Example :
512             Returns : A string
513             Args : a codon = a three nucleotide character string
514              
515              
516             =cut
517              
518             sub translate_strict{
519 3     3 1 9 my ($self, $value) = @_;
520 3         6 my $id = $self->{'id'};
521              
522 3         8 $value = lc $value;
523 3         4 $value =~ tr/u/t/;
524              
525 3 50       11 return '' unless length $value == 3;
526              
527 3 50       10 return 'X' unless defined $CODONS->{$value};
528              
529 3         13 return substr( $TABLES[$id], $CODONS->{$value}, 1 );
530             }
531              
532             =head2 revtranslate
533              
534             Title : revtranslate
535             Usage : $obj->revtranslate('G')
536             Function: returns codons for an amino acid
537              
538             Returns an empty string for unknown amino acid
539             codes. Ambiguous IUPAC codes Asx,B, (Asp,D; Asn,N) and
540             Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
541             letter amino acid codes are accepted. '*' and 'Ter' are
542             used for terminator.
543              
544             By default, the output codons are shown in DNA. If the
545             output is needed in RNA (tr/t/u/), add a second argument
546             'RNA'.
547              
548             Example : $obj->revtranslate('Gly', 'RNA')
549             Returns : An array of three lower case letter strings i.e. codons
550             Args : amino acid, 'RNA'
551              
552             =cut
553              
554             sub revtranslate {
555 135     135 1 1131 my ($self, $value, $coding) = @_;
556 135         155 my @codons;
557              
558 135 100       246 if (length($value) == 3 ) {
559 5         7 $value = lc $value;
560 5         7 $value = ucfirst $value;
561 5         9 $value = $THREELETTERSYMBOLS{$value};
562             }
563 135 100 100     829 if ( defined $value and $value =~ /$VALID_PROTEIN/
      66        
564             and length($value) == 1
565             ) {
566 133         228 my $id = $self->{'id'};
567              
568 133         190 $value = uc $value;
569 133         151 my @aas = @{$IUPAC_AA{$value}};
  133         335  
570 133         201 foreach my $aa (@aas) {
571             #print $aa, " -2\n";
572 145 100       302 $aa = '\*' if $aa eq '*';
573 145         1329 while ($TABLES[$id] =~ m/$aa/g) {
574 399         656 my $p = pos $TABLES[$id];
575 399         1348 push (@codons, $TRCOL->{--$p});
576             }
577             }
578             }
579              
580 135 100 66     269 if ($coding and uc ($coding) eq 'RNA') {
581 1         5 for my $i (0..$#codons) {
582 1         2 $codons[$i] =~ tr/t/u/;
583             }
584             }
585              
586 135         406 return @codons;
587             }
588              
589             =head2 reverse_translate_all
590              
591             Title : reverse_translate_all
592             Usage : my $iup_str = $cttable->reverse_translate_all($seq_object)
593             my $iup_str = $cttable->reverse_translate_all($seq_object,
594             $cutable,
595             15);
596             Function: reverse translates a protein sequence into IUPAC nucleotide
597             sequence. An 'X' in the protein sequence is converted to 'NNN'
598             in the nucleotide sequence.
599             Returns : a string
600             Args : a Bio::PrimarySeqI compatible object (mandatory)
601             a Bio::CodonUsage::Table object and a threshold if only
602             codons with a relative frequency above the threshold are
603             to be considered.
604             =cut
605              
606             sub reverse_translate_all {
607 22     22 1 54 my ($self, $obj, $cut, $threshold) = @_;
608              
609             ## check args are OK
610              
611 22 50 33     147 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
612 0         0 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
613             ref($obj) . "]");
614             }
615 22 50       69 if($obj->alphabet ne 'protein') {
616 0         0 $self->throw("Cannot reverse translate, need an amino acid sequence .".
617             "This sequence is of type [" . $obj->alphabet ."]");
618             }
619 22         40 my @data;
620 22         57 my @seq = split '', $obj->seq;
621              
622             ## if we're not supplying a codon usage table...
623 22 100 66     91 if( !$cut && !$threshold) {
624             ## get lists of possible codons for each aa.
625 21         41 for my $aa (@seq) {
626 75 100       173 if ($aa =~ /x/i) {
627 7         21 push @data, (['NNN']);
628             }else {
629 68         130 my @cods = $self->revtranslate($aa);
630 68         158 push @data, \@cods;
631             }
632             }
633             }else{
634             #else we are supplying a codon usage table, we just want common codons
635             #check args first.
636 1 50       5 if(!$cut->isa('Bio::CodonUsage::Table')) {
637 0         0 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
638             ref($cut). "].");
639             }
640 1         5 my $cod_ref = $cut->probable_codons($threshold);
641 1         2 for my $aa (@seq) {
642 21 100       37 if ($aa =~ /x/i) {
643 1         3 push @data, (['NNN']);
644 1         5 next;
645             }
646 20         25 push @data, $cod_ref->{$aa};
647             }
648             }
649              
650 22         69 return $self->_make_iupac_string(\@data);
651             }
652              
653             =head2 reverse_translate_best
654              
655             Title : reverse_translate_best
656             Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
657             Function: Reverse translates a protein sequence into plain nucleotide
658             sequence (GATC), uses the most common codon for each amino acid
659             Returns : A string
660             Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
661              
662             =cut
663              
664             sub reverse_translate_best {
665              
666 1     1 1 4 my ($self, $obj, $cut) = @_;
667              
668 1 50 33     8 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
669 0         0 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
670             ref($obj) . "]");
671             }
672 1 50       4 if ($obj->alphabet ne 'protein') {
673 0         0 $self->throw("Cannot reverse translate, need an amino acid sequence .".
674             "This sequence is of type [" . $obj->alphabet ."]");
675             }
676 1 50       10 if ( !$cut | !$cut->isa('Bio::CodonUsage::Table')) {
677 0         0 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
678             ref($cut). "].");
679             }
680              
681 1         2 my $str = '';
682 1         4 my @seq = split '', $obj->seq;
683              
684 1         6 my $cod_ref = $cut->most_common_codons();
685              
686 1         2 for my $aa ( @seq ) {
687 21 100       29 if ($aa =~ /x/i) {
688 1         2 $str .= 'NNN';
689 1         3 next;
690             }
691 20 50       23 if ( defined $cod_ref->{$aa} ) {
692 20         24 $str .= $cod_ref->{$aa};
693             } else {
694 0         0 $self->throw("Input sequence contains invalid character: $aa");
695             }
696             }
697 1         7 return $str;
698             }
699              
700             =head2 is_start_codon
701              
702             Title : is_start_codon
703             Usage : $obj->is_start_codon('ATG')
704             Function: returns true (1) for all codons that can be used as a
705             translation start, false (0) for others.
706             Example : $myCodonTable->is_start_codon('ATG')
707             Returns : boolean
708             Args : codon
709              
710             =cut
711              
712             sub is_start_codon{
713 718     718 1 1029 shift->_codon_is( shift, \@STARTS, 'M' );
714             }
715              
716             =head2 is_ter_codon
717              
718             Title : is_ter_codon
719             Usage : $obj->is_ter_codon('GAA')
720             Function: returns true (1) for all codons that can be used as a
721             translation tarminator, false (0) for others.
722             Example : $myCodonTable->is_ter_codon('ATG')
723             Returns : boolean
724             Args : codon
725              
726             =cut
727              
728             sub is_ter_codon{
729 613     613 1 785 my ($self, $value) = @_;
730 613         697 my $id = $self->{'id'};
731              
732             # We need to ensure U is mapped to T (ie. UAG)
733 613         647 $value = uc $value;
734 613         651 $value =~ tr/U/T/;
735              
736 613 100       824 if (length $value != 3 ) {
737             # Incomplete codons are not stop codons
738 1         5 return 0;
739             } else {
740 612         578 my $result = 0;
741              
742             # For all the possible codons, if any are not a stop
743             # codon, fail immediately
744 612         821 for my $c ( $self->unambiguous_codons($value) ) {
745 615         999 my $m = substr( $TABLES[$id], $CODONS->{$c}, 1 );
746 615 100       808 if($m eq $TERMINATOR) {
747 46         58 $result = 1;
748             } else {
749 569         2363 return 0;
750             }
751             }
752 43         149 return $result;
753             }
754             }
755              
756             # desc: compares the passed value with a single entry in the given
757             # codon table
758             # args: a value (typically a three-char string like 'atg'),
759             # a reference to the appropriate set of codon tables,
760             # a single-character value to check for at the position in the
761             # given codon table
762             # ret: boolean, true if the given codon table contains the $key at the
763             # position corresponding to $value
764             sub _codon_is {
765 718     718   981 my ($self, $value, $table, $key ) = @_;
766              
767 718 50       995 return 0 unless length $value == 3;
768              
769 718         755 $value = lc $value;
770 718         750 $value =~ tr/u/t/;
771              
772 718         851 my $id = $self->{'id'};
773 718         963 for my $c ( $self->unambiguous_codons($value) ) {
774 720         1125 my $m = substr( $table->[$id], $CODONS->{$c}, 1 );
775 720 100       1175 if ($m eq $key) { return 1; }
  55         145  
776             }
777 663         1733 return 0;
778             }
779              
780             =head2 is_unknown_codon
781              
782             Title : is_unknown_codon
783             Usage : $obj->is_unknown_codon('GAJ')
784             Function: returns false (0) for all codons that are valid,
785             true (1) for others.
786             Example : $myCodonTable->is_unknown_codon('NTG')
787             Returns : boolean
788             Args : codon
789              
790              
791             =cut
792              
793             sub is_unknown_codon{
794 3     3 1 6 my ($self, $value) = @_;
795 3         6 $value = lc $value;
796 3         5 $value =~ tr/u/t/;
797 3 100       6 return 1 unless $self->unambiguous_codons($value);
798 1         4 return 0;
799             }
800              
801             =head2 unambiguous_codons
802              
803             Title : unambiguous_codons
804             Usage : @codons = $self->unambiguous_codons('ACN')
805             Returns : array of strings (one-letter unambiguous amino acid codes)
806             Args : a codon = a three IUPAC nucleotide character string
807              
808             =cut
809              
810             sub unambiguous_codons{
811 6646     6646 1 7460 my ($self,$value) = @_;
812 6646         13263 my @nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
  19937         28577  
813              
814 6646         8094 my @codons;
815 6646         5817 for my $i ( @{$nts[0]} ) {
  6646         10898  
816 1658         1610 for my $j ( @{$nts[1]} ) {
  1658         1934  
817 1633         1453 for my $k ( @{$nts[2]} ) {
  1633         1869  
818 1804         3693 push @codons, lc "$i$j$k";
819             }}}
820 6646         9708 return @codons;
821             }
822              
823             =head2 _unambiquous_codons
824              
825             deprecated, now an alias for unambiguous_codons
826              
827             =cut
828              
829             sub _unambiquous_codons {
830 0     0   0 unambiguous_codons( undef, @_ );
831             }
832              
833             =head2 add_table
834              
835             Title : add_table
836             Usage : $newid = $ct->add_table($name, $table, $starts)
837             Function: Add a custom Codon Table into the object.
838             Know what you are doing, only the length of
839             the argument strings is checked!
840             Returns : the id of the new codon table
841             Args : name, a string, optional (can be empty)
842             table, a string of 64 characters
843             startcodons, a string of 64 characters, defaults to standard
844              
845             =cut
846              
847             sub add_table {
848 1     1 1 4 my ($self, $name, $table, $starts) = @_;
849              
850 1   33     4 $name ||= 'Custom' . $#NAMES + 1;
851 1   33     31 $starts ||= $STARTS[1];
852 1 50 33     5 $self->throw('Suspect input!')
853             unless length($table) == 64 and length($starts) == 64;
854              
855 1         4 push @NAMES, $name;
856 1         3 push @TABLES, $table;
857 1         3 push @STARTS, $starts;
858              
859 1         5 return $#NAMES;
860             }
861              
862             sub _make_iupac_string {
863 22     22   45 my ($self, $cod_ref) = @_;
864 22 50       60 if(ref($cod_ref) ne 'ARRAY') {
865 0         0 $self->throw(" I need a reference to a list of references to codons, ".
866             " not a [". ref($cod_ref) . "].");
867             }
868 22         100 my %iupac_hash = Bio::Tools::IUPAC->iupac_rev_iub();
869 22         59 my $iupac_string = ''; ## the string to be returned
870 22         45 for my $aa (@$cod_ref) {
871              
872             ## scan through codon positions, record the differing values,
873             # then look up in the iub hash
874 96         139 for my $index(0..2) {
875 288         675 my %h;
876 288         337 map { my $k = substr($_,$index,1);
  846         936  
877 846         1089 $h{$k} = undef;} @$aa;
878 288         558 my $lookup_key = join '', sort{$a cmp $b}keys %h;
  226         320  
879              
880             ## extend string
881 288         541 $iupac_string .= $iupac_hash{uc$lookup_key};
882             }
883             }
884 22         180 return $iupac_string;
885             }
886              
887              
888             1;