|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # BioPerl module for Bio::Align::DNAStatistics  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Please direct questions and support issues to    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Cared for by Jason Stajich   | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Copyright Jason Stajich  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # You may distribute this module under the same terms as perl itself  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # POD documentation - main docs before the code  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::Align::DNAStatistics - Calculate some statistics for a DNA alignment  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Bio::AlignIO;  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Bio::Align::DNAStatistics;  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $stats = Bio::Align::DNAStatistics->new();  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $alignin = Bio::AlignIO->new(-format => 'emboss',  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                  -file   => 't/data/insulin.water');  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $aln = $alignin->next_aln;  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $jcmatrix = $stats->distance(-align => $aln,   | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                   -method => 'Jukes-Cantor');  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print $jcmatrix->print_matrix;  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ## and for measurements of synonymous /nonsynonymous substitutions ##  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $in = Bio::AlignIO->new(-format => 'fasta',  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             -file   => 't/data/nei_gojobori_test.aln');  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $alnobj = $in->next_aln;  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my ($seq1id,$seq2id) = map { $_->display_id } $alnobj->each_seq;  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $results = $stats->calc_KaKs_pair($alnobj, $seq1id, $seq2id);  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print "comparing ".$results->[0]{'Seq1'}." and ".$results->[0]{'Seq2'}."\n";  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (sort keys %{$results->[0]} ){  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       next if /Seq/;  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       printf("%-9s %.4f \n",$_ , $results->[0]{$_});  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $results2 = $stats->calc_all_KaKs_pairs($alnobj);  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for my $an (@$results2){  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       print "comparing ". $an->{'Seq1'}." and ". $an->{'Seq2'}. " \n";  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       for (sort keys %$an ){  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  next if /Seq/;  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  printf("%-9s %.4f \n",$_ , $an->{$_});  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       print "\n\n";  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $result3 = $stats->calc_average_KaKs($alnobj, 1000);  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (sort keys %$result3 ){  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       next if /Seq/;  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       printf("%-9s %.4f \n",$_ , $result3->{$_});  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This object contains routines for calculating various statistics and  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 distances for DNA alignments.  The routines are not well tested and do  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 contain errors at this point.  Work is underway to correct them, but  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 do not expect this code to give you the right answer currently!  Use  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 dnadist/distmat in the PHLYIP or EMBOSS packages to calculate the  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 distances.  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Several different distance method calculations are supported.  Listed  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in brackets are the pattern which will match  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 3  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 JukesCantor [jc|jukes|jukescantor|jukes-cantor]  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Uncorrected [jcuncor|uncorrected]  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 F81 [f81|felsenstein]  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Kimura [k2|k2p|k80|kimura]  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Tamura [t92|tamura|tamura92]  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 F84 [f84|felsenstein84]  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 TajimaNei [tajimanei|tajima\-nei]  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 JinNei [jinnei|jin\-nei] (not implemented)  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 There are also three methods to calculate the ratio of synonymous to  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 non-synonymous mutations.  All are implementations of the Nei-Gojobori  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 evolutionary pathway method and use the Jukes-Cantor method of  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 nucleotide substitution. This method works well so long as the  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 nucleotide frequencies are roughly equal and there is no significant  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 transition/transversion bias.  In order to use these methods there are  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 several pre-requisites for the alignment.  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 3  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item 1  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 DNA alignment must be based on protein alignment. Use the subroutine  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L to achieve this.  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item 2  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Therefore alignment gaps must be in multiples of 3 (representing an aa  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 deletion/insertion) and at present must be indicated by a '-' symbol.  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item 3  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Alignment must be solely of coding region and be in reading frame 0 to  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 achieve meaningful results  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item 4  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Alignment must therefore be a multiple of 3 nucleotides long.  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item 5  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 All sequences must be the same length (including gaps). This should be  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the case anyway if the sequences have been automatically aligned using  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a program like Clustal.  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item 6  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Only the standard codon alphabet is supported at present.  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 calc_KaKs_pair() calculates a number of statistics for a named pair of  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequences in the alignment.  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 calc_all_KaKs_pairs() calculates these statistics for all pairwise  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 comparisons in an MSA.  The statistics returned are:  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 3  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 S_d - Number of synonymous mutations between the 2 sequences.  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 N_d - Number of non-synonymous mutations between the 2 sequences.  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 S -  Mean number of  synonymous sites in both sequences.  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 N -  mean number of  synonymous sites in both sequences.  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 P_s - proportion of synonymous differences in both sequences given by  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 P_s = S_d/S.  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 P_n - proportion of non-synonymous differences in both sequences given  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 by P_n = S_n/S.  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_s - estimation of synonymous mutations per synonymous site (by  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Jukes-Cantor).  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_n - estimation of non-synonymous mutations per non-synonymous site (by  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Jukes-Cantor).  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_n_var - estimation of variance of D_n .  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_s_var - estimation of variance of S_n.  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 z_value - calculation of z value.Positive value indicates D_n E D_s,  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 negative value indicates D_s E D_n.  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The statistics returned by calc_average_KaKs are:  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 3  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_s - Average number of synonymous mutations/synonymous site.  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_n - Average number of non-synonymous mutations/non-synonymous site.  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_s_var - Estimated variance of Ds from bootstrapped alignments.  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_n_var - Estimated variance of Dn from bootstrapped alignments.  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 z_score - calculation of z value. Positive value indicates D_n ED_s,  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 negative values vice versa.  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The design of the code is based around the explanation of the  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Nei-Gojobori algorithm in the excellent book "Molecular Evolution and  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Phylogenetics" by Nei and Kumar, published by Oxford University  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Press. The methods have been tested using the worked example 4.1 in  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the book, and reproduce those results. If people like having this sort  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of analysis in BioPerl other methods for estimating Ds and Dn can be  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 provided later.  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Much of the DNA distance code is based on implementations in EMBOSS  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (Rice et al, www.emboss.org) [distmat.c] and PHYLIP (J. Felsenstein et  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 al) [dnadist.c].  Insight also gained from Eddy, Durbin, Krogh, &  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Mitchison.  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 REFERENCES  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 3  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_JukesCantor   | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 "Phylogenetic Inference", Swoffrod, Olsen, Waddell and Hillis, in  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Mol. Systematics, 2nd ed, 1996, Ch 11.  Derived from "Evolution of  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Protein Molecules", Jukes & Cantor, in Mammalian Prot. Metab., III,  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1969, pp. 21-132.  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_Tamura  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 K Tamura, Mol. Biol. Evol. 1992, 9, 678.  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_Kimura   | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 M Kimura, J. Mol. Evol., 1980, 16, 111.  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 JinNei   | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Jin and Nei, Mol. Biol. Evol. 82, 7, 1990.  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 D_TajimaNei  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Tajima and Nei, Mol. Biol. Evol. 1984, 1, 269.  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 FEEDBACK  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Mailing Lists  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 User feedback is an integral part of the evolution of this and other  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bioperl modules. Send your comments and suggestions preferably to  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the Bioperl mailing list.  Your participation is much appreciated.  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bioperl-l@bioperl.org                  - General discussion  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Support   | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Please direct usage questions or support issues to the mailing list:  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 I  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 rather than to the module maintainer directly. Many experienced and   | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 reponsive experts will be able look at the problem and quickly   | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 address it. Please include a thorough description of the problem   | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with code and data examples if at all possible.  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Reporting Bugs  | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Report bugs to the Bioperl bug tracking system to help us keep track  | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the bugs and their resolution. Bug reports can be submitted via the  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 web:  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   https://github.com/bioperl/bioperl-live/issues  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR - Jason Stajich  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Email jason-AT-bioperl.org  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 CONTRIBUTORS  | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Richard Adams, richard.adams@ed.ac.uk  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 APPENDIX  | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The rest of the documentation details each of the object methods.  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Internal methods are usually preceded with a _  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Let the code begin...  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Bio::Align::DNAStatistics;  | 
| 
338
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
361
 | 
 use vars qw(%DNAChanges @Nucleotides %NucleotideIndexes  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    $GapChars $SeqCount $DefaultGapPenalty %DistanceMethods  | 
| 
340
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
1166
 | 
             $CODONS %synchanges $synsites $Precision $GCChhars);  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
341
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
22
 | 
 use strict;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
    | 
| 
342
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
907
 | 
 use Bio::Align::PairwiseStatistics;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
98
 | 
    | 
| 
343
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
1013
 | 
 use Bio::Matrix::PhylipDist;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
107
 | 
    | 
| 
344
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
580
 | 
 use Bio::Tools::IUPAC;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
453
 | 
    | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 BEGIN {  | 
| 
347
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
15
 | 
     $GapChars = '[\.\-]';  | 
| 
348
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     $GCChhars = '[GCS]';  | 
| 
349
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     @Nucleotides = qw(A G T C);  | 
| 
350
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     $SeqCount = 2;  | 
| 
351
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     $Precision = 5;  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # these values come from EMBOSS distmat implementation  | 
| 
354
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     %NucleotideIndexes = ( 'A' => 0,  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   'T' => 1,  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   'C' => 2,  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   'G' => 3,  | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   'AT' => 0,  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   'AC' => 1,  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   'AG' => 2,  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   'CT' => 3,  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   'GT' => 4,  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   'CG' => 5,  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # these are wrong now  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			   'S' => [ 1, 3],  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			   'W' => [ 0, 4],  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			   'Y' => [ 2, 3],  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			   'R' => [ 0, 1],  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			   'M' => [ 0, 3],  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			   'K' => [ 1, 2],  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			   'B' => [ 1, 2, 3],  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			   'H' => [ 0, 2, 3],  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			   'V' => [ 0, 1, 3],  | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			   'D' => [ 0, 1, 2],  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   );  | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
379
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     $DefaultGapPenalty = 0;  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # could put ambiguities here?  | 
| 
381
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     %DNAChanges = ( 'Transversions' => { 'A' => [ 'T', 'C'],  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 'T' => [ 'A', 'G'],  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 'C' => [ 'A', 'G'],  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 'G' => [ 'C', 'T'],  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				     },  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    'Transitions'   => { 'A' => [ 'G' ],  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 'G' => [ 'A' ],  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 'C' => [ 'T' ],  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 'T' => [ 'C' ],  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				     },  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    );  | 
| 
392
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
91
 | 
     %DistanceMethods = ( 'jc|jukes|jukescantor|jukes\-cantor' => 'JukesCantor',  | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 'jcuncor|uncorrected'   => 'Uncorrected',  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 'f81|felsenstein81'     => 'F81',  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 'k2|k2p|k80|kimura'     => 'Kimura',  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 't92|tamura|tamura92'   => 'Tamura',  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 'f84|felsenstein84'     => 'F84',  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 'tajimanei|tajima\-nei' => 'TajimaNei',  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 'jinnei|jin\-nei'       => 'JinNei');  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
402
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
22
 | 
 use base qw(Bio::Root::Root Bio::Align::StatisticsI);  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21812
 | 
    | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## generate look up hashes for Nei_Gojobori methods##  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $CODONS = get_codons();  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my @t = split '', "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG";  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #create look up hash of number of possible synonymous mutations per codon  | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $synsites = get_syn_sites();  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #create reference look up hash of single basechanges in codons  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 %synchanges = get_syn_changes();  | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 new  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : new  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $obj = Bio::Align::DNAStatistics->new();  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Builds a new Bio::Align::DNAStatistics object   | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : Bio::Align::DNAStatistics  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : none  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {   | 
| 
426
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
617
 | 
     my ($class,@args) = @_;  | 
| 
427
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     my $self = $class->SUPER::new(@args);  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
429
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     $self->pairwise_stats( Bio::Align::PairwiseStatistics->new());  | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
431
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     return $self;  | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 distance  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : distance  | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $distance_mat = $stats->distance(-align  => $aln,   | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 			       -method => $method);  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculates a distance matrix for all pairwise distances of  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            sequences in an alignment.  | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : L object  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : -align  => Bio::Align::AlignI object  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            -method => String specifying specific distance method   | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       (implementing class may assume a default)  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See also: L  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub distance{  | 
| 
451
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
  
1
  
 | 
100
 | 
    my ($self,@args) = @_;  | 
| 
452
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
61
 | 
    my ($aln,$method) = $self->_rearrange([qw(ALIGN METHOD)],@args);  | 
| 
453
 | 
13
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
112
 | 
    if( ! defined $aln || ! ref ($aln) || ! $aln->isa('Bio::Align::AlignI') ) {   | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
454
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $self->throw("Must supply a valid Bio::Align::AlignI for the -align parameter in distance");  | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
456
 | 
13
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
33
 | 
    $method ||= 'JukesCantor';  | 
| 
457
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
54
 | 
    foreach my $m ( keys %DistanceMethods ) {  | 
| 
458
 | 
58
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
1455
 | 
        if(defined $m &&  $method =~ /$m/i ) {  | 
| 
459
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
 	   my $mtd = "D_$DistanceMethods{$m}";  | 
| 
460
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
71
 | 
 	   return $self->$mtd($aln);  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
463
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    $self->warn("Unrecognized distance method $method must be one of [".  | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       join(',',$self->available_distance_methods())."]");  | 
| 
465
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    return;  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 available_distance_methods  | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : available_distance_methods  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my @methods = $stats->available_distance_methods();  | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Enumerates the possible distance methods  | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : Array of strings  | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : none  | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub available_distance_methods{  | 
| 
480
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
    my ($self,@args) = @_;  | 
| 
481
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    return values %DistanceMethods;  | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 D - distance methods  | 
| 
485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 D_JukesCantor  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : D_JukesCantor  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $d = $stat->D_JukesCantor($aln)  | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculates D (pairwise distance) between 2 sequences in an   | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            alignment using the Jukes-Cantor 1 parameter model.   | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : L  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L of DNA sequences  | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            double - gap penalty  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub D_JukesCantor{  | 
| 
504
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
7
 | 
    my ($self,$aln,$gappenalty) = @_;  | 
| 
505
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    return 0 unless $self->_check_arg($aln);  | 
| 
506
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    $gappenalty = $DefaultGapPenalty unless defined $gappenalty;  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # ambiguities ignored at this point  | 
| 
508
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    my (@seqs,@names,@values,%dist);  | 
| 
509
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    my $seqct = 0;  | 
| 
510
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
    foreach my $seq ( $aln->each_seq) {  | 
| 
511
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
        push @names, $seq->display_id;  | 
| 
512
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
        push @seqs, uc $seq->seq();  | 
| 
513
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
        $seqct++;  | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
515
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    my $precisionstr = "%.$Precision"."f";  | 
| 
516
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    for(my $i = 0; $i < $seqct-1; $i++ ) {  | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # (diagonals) distance is 0 for same sequence  | 
| 
518
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
        $dist{$names[$i]}->{$names[$i]} = [$i,$i];  | 
| 
519
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
        $values[$i][$i] = sprintf($precisionstr,0);          | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
521
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
        for( my $j = $i+1; $j < $seqct; $j++ ) {  | 
| 
522
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	   my ($matrix,$pfreq,$gaps) = $self->_build_nt_matrix($seqs[$i],  | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 							       $seqs[$j]);  | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   # just want diagonals  | 
| 
525
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	   my $m = ( $matrix->[0]->[0] + $matrix->[1]->[1] +   | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     $matrix->[2]->[2] + $matrix->[3]->[3] );  | 
| 
527
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	   my $D = 1 - ( $m / ($aln->length - $gaps + ( $gaps * $gappenalty)));  | 
| 
528
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	   my $d = (- 3 / 4) * log ( 1 - (4 * $D/ 3));  | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   # fwd and rev lookup  | 
| 
530
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];  | 
| 
531
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	     | 
| 
532
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
 	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$d);  | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # (diagonals) distance is 0 for same sequence  | 
| 
534
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];     | 
| 
535
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	   $values[$j][$j] = sprintf($precisionstr,0);  | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
538
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
    return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -matrix  => \%dist,  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -names   => \@names,  | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -values  => \@values);     | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 D_F81  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : D_F81  | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $d = $stat->D_F81($aln)  | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculates D (pairwise distance) between 2 sequences in an   | 
| 
549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            alignment using the Felsenstein 1981 distance model.   | 
| 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            Relaxes the assumption of equal base frequencies that is  | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            in JC.  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : L  | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L of DNA sequences  | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub D_F81{  | 
| 
559
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
7
 | 
    my ($self,$aln,$gappenalty) = @_;  | 
| 
560
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    return 0 unless $self->_check_arg($aln);  | 
| 
561
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    $gappenalty = $DefaultGapPenalty unless defined $gappenalty;  | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # ambiguities ignored at this point  | 
| 
563
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    my (@seqs,@names,@values,%dist);  | 
| 
564
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    my $seqct = 0;  | 
| 
565
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    foreach my $seq ( $aln->each_seq) {  | 
| 
566
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
        push @names, $seq->display_id;;  | 
| 
567
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
        push @seqs, uc $seq->seq();  | 
| 
568
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
        $seqct++;  | 
| 
569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
570
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    my $precisionstr = "%.$Precision"."f";  | 
| 
571
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    for(my $i = 0; $i < $seqct-1; $i++ ) {  | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # (diagonals) distance is 0 for same sequence  | 
| 
573
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
        $dist{$names[$i]}->{$names[$i]} = [$i,$i];  | 
| 
574
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
        $values[$i][$i] = sprintf($precisionstr,0);          | 
| 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
576
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
        for( my $j = $i+1; $j < $seqct; $j++ ) {  | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     | 
| 
578
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	   my ($matrix,$pfreq,$gaps) = $self->_build_nt_matrix($seqs[$i],  | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 							       $seqs[$j]);  | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   # just want diagonals  | 
| 
581
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	   my $m = ( $matrix->[0]->[0] + $matrix->[1]->[1] +   | 
| 
582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     $matrix->[2]->[2] + $matrix->[3]->[3] );  | 
| 
583
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	   my $D = 1 - ( $m / ($aln->length - $gaps + ( $gaps * $gappenalty)));  | 
| 
584
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	   my $d = (- 3 / 4) * log ( 1 - (4 * $D/ 3));  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   # fwd and rev lookup  | 
| 
586
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];  | 
| 
587
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	     | 
| 
588
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$d);  | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # (diagonals) distance is 0 for same sequence  | 
| 
590
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];	     | 
| 
591
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	   $values[$j][$j] = sprintf($precisionstr,0);   | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
594
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',  | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -matrix  => \%dist,  | 
| 
596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -names   => \@names,  | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -values  => \@values);     | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 D_Uncorrected  | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : D_Uncorrected  | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $d = $stats->D_Uncorrected($aln)  | 
| 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculate a distance D, no correction for multiple substitutions   | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            is used.  In rare cases where sequences may not overlap, 'NA' is  | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            substituted for the distance.  | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : L  | 
| 
608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L (DNA Alignment)  | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            [optional] gap penalty  | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub D_Uncorrected {  | 
| 
614
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
10
 | 
    my ($self,$aln,$gappenalty) = @_;  | 
| 
615
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    $gappenalty = $DefaultGapPenalty unless defined $gappenalty;  | 
| 
616
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    return 0 unless $self->_check_arg($aln);  | 
| 
617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # ambiguities ignored at this point  | 
| 
618
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    my (@seqs,@names,@values,%dist);  | 
| 
619
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    my $seqct = 0;  | 
| 
620
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    foreach my $seq ( $aln->each_seq) {  | 
| 
621
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
        push @names, $seq->display_id;  | 
| 
622
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
        push @seqs, uc $seq->seq();  | 
| 
623
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
        $seqct++;  | 
| 
624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
626
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    my $precisionstr = "%.$Precision"."f";  | 
| 
627
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    my $len = $aln->length;  | 
| 
628
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    for( my $i = 0; $i < $seqct-1; $i++ ) {  | 
| 
629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # (diagonals) distance is 0 for same sequence  | 
| 
630
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
        $dist{$names[$i]}->{$names[$i]} = [$i,$i];  | 
| 
631
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
        $values[$i][$i] = sprintf($precisionstr,0);  | 
| 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          | 
| 
633
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
        for( my $j = $i+1; $j < $seqct; $j++ ) {  | 
| 
634
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
 	   my ($matrix,$pfreq,$gaps) = $self->_build_nt_matrix($seqs[$i],  | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 							       $seqs[$j]);  | 
| 
636
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
 	   my $m = ( $matrix->[0]->[0] +   | 
| 
637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     $matrix->[1]->[1] +  | 
| 
638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     $matrix->[2]->[2] +  | 
| 
639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     $matrix->[3]->[3] );  | 
| 
640
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
        my $denom = ( $len - $gaps + ( $gaps * $gappenalty));  | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          | 
| 
642
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
36
 | 
        $self->warn("No distance calculated between $names[$i] and $names[$j], inserting -1")  | 
| 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             unless $denom;  | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          | 
| 
645
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
27
 | 
 	   my $D = $denom ? 1 - ( $m / $denom) : -1;  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   # fwd and rev lookup  | 
| 
647
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
 	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];  | 
| 
648
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
 	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];  | 
| 
649
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
64
 | 
 	   $values[$j][$i] = $values[$i][$j] = $denom ? sprintf($precisionstr,$D)  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                   : sprintf("%-*s", $Precision + 2, $D);  | 
| 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # (diagonals) distance is 0 for same sequence  | 
| 
652
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
 	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];	     | 
| 
653
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
62
 | 
 	   $values[$j][$j] = sprintf($precisionstr,0);   | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
656
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
    return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',  | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -matrix  => \%dist,  | 
| 
658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -names   => \@names,  | 
| 
659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -values  => \@values);   | 
| 
660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # M Kimura, J. Mol. Evol., 1980, 16, 111.  | 
| 
664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 D_Kimura  | 
| 
666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : D_Kimura  | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $d = $stat->D_Kimura($aln)  | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculates D (pairwise distance) between all pairs of sequences   | 
| 
670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            in an alignment using the Kimura 2 parameter model.  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : L  | 
| 
672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L of DNA sequences  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub D_Kimura {  | 
| 
678
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
6
 | 
    my ($self,$aln) = @_;  | 
| 
679
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    return 0 unless $self->_check_arg($aln);  | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # ambiguities ignored at this point  | 
| 
681
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    my (@names,@values,%dist);  | 
| 
682
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    my $seqct = 0;  | 
| 
683
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    foreach my $seq ( $aln->each_seq) {  | 
| 
684
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
        push @names, $seq->display_id;  | 
| 
685
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
        $seqct++;  | 
| 
686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
688
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    my $precisionstr = "%.$Precision"."f";  | 
| 
689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
690
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    for( my $i = 0; $i < $seqct-1; $i++ ) {  | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # (diagonals) distance is 0 for same sequence  | 
| 
692
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
        $dist{$names[$i]}->{$names[$i]} = [$i,$i];  | 
| 
693
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
        $values[$i][$i] = sprintf($precisionstr,0);  | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
695
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
        for( my $j = $i+1; $j < $seqct; $j++ ) {  | 
| 
696
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	   my $pairwise = $aln->select_noncont($i+1,$j+1);  | 
| 
697
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	   my $L = $self->pairwise_stats->number_of_comparable_bases($pairwise);  | 
| 
698
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	   unless( $L ) {   | 
| 
699
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $L = 1;  | 
| 
700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }  | 
| 
701
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	   my $P = $self->transitions($pairwise) / $L;  | 
| 
702
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	   my $Q = $self->transversions($pairwise) / $L;  | 
| 
703
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	   my $K = 0;  | 
| 
704
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	   my $denom = ( 1 - (2 * $P) - $Q);  | 
| 
705
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	   if( $denom == 0 ) {  | 
| 
706
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $self->throw("cannot find distance for ",$i+1,  | 
| 
707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			    ",",$j+1," $P, $Q\n");  | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }  | 
| 
709
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	   my $a = 1 / ( 1 - (2 * $P) - $Q);  | 
| 
710
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	   my $b = 1 / ( 1 - 2 * $Q );  | 
| 
711
 | 
2
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
10
 | 
 	   if( $a < 0 || $b < 0 ) {   | 
| 
712
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $K = -1;  | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   } else{   | 
| 
714
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	       $K = (1/2) * log ( $a ) + (1/4) * log($b);  | 
| 
715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }  | 
| 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   # fwd and rev lookup  | 
| 
717
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];  | 
| 
718
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	     | 
| 
719
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
 	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$K);  | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # (diagonals) distance is 0 for same sequence  | 
| 
721
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];	     | 
| 
722
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 	   $values[$j][$j] = sprintf($precisionstr,0);   | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
725
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
    return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',  | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -matrix  => \%dist,  | 
| 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -names   => \@names,  | 
| 
728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -values  => \@values);   | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 D_Kimura_variance  | 
| 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : D_Kimura  | 
| 
735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $d = $stat->D_Kimura_variance($aln)  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculates D (pairwise distance) between all pairs of sequences   | 
| 
737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            in an alignment using the Kimura 2 parameter model.  | 
| 
738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : array of 2 L,  | 
| 
739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            the first is the Kimura distance and the second is  | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            a matrix of variance V(K)  | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L of DNA sequences  | 
| 
742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub D_Kimura_variance {  | 
| 
747
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
    my ($self,$aln) = @_;  | 
| 
748
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    return 0 unless $self->_check_arg($aln);  | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # ambiguities ignored at this point  | 
| 
750
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    my (@names,@values,%dist,@var);  | 
| 
751
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    my $seqct = 0;  | 
| 
752
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    foreach my $seq ( $aln->each_seq) {  | 
| 
753
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        push @names, $seq->display_id;  | 
| 
754
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $seqct++;  | 
| 
755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
757
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    my $precisionstr = "%.$Precision"."f";  | 
| 
758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
759
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    for( my $i = 0; $i < $seqct-1; $i++ ) {  | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # (diagonals) distance is 0 for same sequence  | 
| 
761
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $dist{$names[$i]}->{$names[$i]} = [$i,$i];  | 
| 
762
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $values[$i][$i] = sprintf($precisionstr,0);  | 
| 
763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
764
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        for( my $j = $i+1; $j < $seqct; $j++ ) {  | 
| 
765
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   my $pairwise = $aln->select_noncont($i+1,$j+1);  | 
| 
766
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   my $L = $self->pairwise_stats->number_of_comparable_bases($pairwise);  | 
| 
767
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   unless( $L ) {   | 
| 
768
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $L = 1;  | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }  | 
| 
770
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   my $P = $self->transitions($pairwise) / $L;  | 
| 
771
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   my $Q = $self->transversions($pairwise) / $L;  | 
| 
772
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   my ($a,$b,$K,$var_k);  | 
| 
773
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   my $a_denom = ( 1 - (2 * $P) - $Q);  | 
| 
774
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   my $b_denom = 1 - 2 * $Q;  | 
| 
775
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	   unless( $a_denom > 0 && $b_denom > 0 ) {  | 
| 
776
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $a = 1;  | 
| 
777
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $b = 1;  | 
| 
778
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $K = -1;  | 
| 
779
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $var_k = -1;  | 
| 
780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   } else {   | 
| 
781
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $a = 1 / $a_denom;  | 
| 
782
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $b = 1 / $b_denom;  | 
| 
783
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $K = (1/2) * log ( $a ) + (1/4) * log($b);  | 
| 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       # from Wu and Li 1985 which in turn is from Kimura 1980  | 
| 
785
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       my $c = ( $a - $b ) / 2;  | 
| 
786
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       my $d = ( $a + $b ) / 2;  | 
| 
787
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $var_k = ( $a**2 * $P + $d**2 * $Q - ( $a * $P + $d * $Q)**2 ) / $L;  | 
| 
788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }  | 
| 
789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   # fwd and rev lookup  | 
| 
791
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];  | 
| 
792
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	     | 
| 
793
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$K);  | 
| 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # (diagonals) distance is 0 for same sequence  | 
| 
795
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];     | 
| 
796
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   $values[$j]->[$j] = sprintf($precisionstr,0);   | 
| 
797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     | 
| 
798
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   $var[$j]->[$i] = $var[$i]->[$j] = sprintf($precisionstr,$var_k);  | 
| 
799
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   $var[$j]->[$j] = $values[$j]->[$j];  | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
802
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    return ( Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',  | 
| 
803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 -matrix  => \%dist,  | 
| 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 -names   => \@names,  | 
| 
805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 -values  => \@values),  | 
| 
806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',  | 
| 
807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 -matrix  => \%dist,  | 
| 
808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 -names   => \@names,  | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 -values  => \@var)  | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    );  | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  K Tamura, Mol. Biol. Evol. 1992, 9, 678.  | 
| 
815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 D_Tamura  | 
| 
817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : D_Tamura  | 
| 
819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : Calculates D (pairwise distance) between 2 sequences in an   | 
| 
820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            alignment using Tamura 1992 distance model.   | 
| 
821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : L  | 
| 
822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L of DNA sequences  | 
| 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub D_Tamura {  | 
| 
828
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
8
 | 
    my ($self,$aln) = @_;  | 
| 
829
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    return 0 unless $self->_check_arg($aln);  | 
| 
830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # ambiguities ignored at this point  | 
| 
831
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    my (@seqs,@names,@values,%dist,$i,$j);  | 
| 
832
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    my $seqct = 0;  | 
| 
833
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    my $length = $aln->length;  | 
| 
834
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    foreach my $seq ( $aln->each_seq) {  | 
| 
835
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
        push @names, $seq->display_id;;  | 
| 
836
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
        push @seqs, uc $seq->seq();  | 
| 
837
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
        $seqct++;  | 
| 
838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
840
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    my $precisionstr = "%.$Precision"."f";  | 
| 
841
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    my (@gap,@gc,@trans,@tranv,@score);  | 
| 
842
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    $i = 0;  | 
| 
843
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    for my $t1 ( @seqs ) {  | 
| 
844
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
        $j = 0;  | 
| 
845
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
        for my $t2 ( @seqs ) {  | 
| 
846
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	   $gap[$i][$j] = 0;  | 
| 
847
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	   for( my $k = 0; $k < $length; $k++ ) {  | 
| 
848
 | 
1532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2373
 | 
 	       my ($c1,$c2) = ( substr($seqs[$i],$k,1),  | 
| 
849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				substr($seqs[$j],$k,1) );  | 
| 
850
 | 
1532
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
6414
 | 
 	       if( $c1 =~ /^$GapChars$/ ||  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		   $c2 =~ /^$GapChars$/ ) {  | 
| 
852
 | 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
231
 | 
 		   $gap[$i][$j]++;	  | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       } elsif( $c2 =~ /^$GCChhars$/i ) {  | 
| 
854
 | 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1728
 | 
 		   $gc[$i][$j]++;  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       }   | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }  | 
| 
857
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	   $gc[$i][$j] = ( $gc[$i][$j] /   | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   ($length - $gap[$i][$j]) );  | 
| 
859
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	   $j++;  | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
861
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
        $i++;  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      | 
| 
864
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    for( $i = 0; $i < $seqct-1; $i++ ) {  | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # (diagonals) distance is 0 for same sequence  | 
| 
866
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
        $dist{$names[$i]}->{$names[$i]} = [$i,$i];  | 
| 
867
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
        $values[$i][$i] = sprintf($precisionstr,0);  | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          | 
| 
869
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
        for( $j = $i+1; $j < $seqct; $j++ ) {  | 
| 
870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     | 
| 
871
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	   my $pairwise = $aln->select_noncont($i+1,$j+1);  | 
| 
872
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	   my $L = $self->pairwise_stats->number_of_comparable_bases($pairwise);  | 
| 
873
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	   my $P = $self->transitions($pairwise) / $L;  | 
| 
874
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	   my $Q = $self->transversions($pairwise) / $L;  | 
| 
875
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 	   my $C = $gc[$i][$j] + $gc[$j][$i]-   | 
| 
876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       ( 2 * $gc[$i][$j] * $gc[$j][$i] );  | 
| 
877
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	   if( $P ) {  | 
| 
878
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	       $P = $P / $C;  | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }  | 
| 
880
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	   my $d = -($C * log(1- $P - $Q)) -(0.5* ( 1 - $C) * log(1 - 2 * $Q));  | 
| 
881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # fwd and rev lookup  | 
| 
882
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];  | 
| 
883
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	     | 
| 
884
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
 	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$d);  | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # (diagonals) distance is 0 for same sequence  | 
| 
886
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];  | 
| 
887
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 	   $values[$j][$j] = sprintf($precisionstr,0);   | 
| 
888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
890
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
    return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',  | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -matrix  => \%dist,  | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -names   => \@names,  | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -values  => \@values);   | 
| 
894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 D_F84  | 
| 
898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : D_F84  | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $d = $stat->D_F84($aln)  | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculates D (pairwise distance) between 2 sequences in an   | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            alignment using the Felsenstein 1984 distance model.   | 
| 
903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : L  | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L of DNA sequences  | 
| 
905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            [optional] double - gap penalty  | 
| 
906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub D_F84 {  | 
| 
910
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
    my ($self,$aln,$gappenalty) = @_;  | 
| 
911
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    return 0 unless $self->_check_arg($aln);  | 
| 
912
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    $self->throw_not_implemented();  | 
| 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # ambiguities ignored at this point  | 
| 
914
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    my (@seqs,@names,@values,%dist);  | 
| 
915
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    my $seqct = 0;  | 
| 
916
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    foreach my $seq ( $aln->each_seq) {  | 
| 
917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # if there is no name,   | 
| 
918
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        my $id = $seq->display_id;  | 
| 
919
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
        if( ! length($id) ||       # deal with empty names  | 
| 
920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   $id =~ /^\s+$/ ) {  | 
| 
921
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	   $id = $seqct+1;  | 
| 
922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
923
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        push @names, $id;  | 
| 
924
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        push @seqs, uc $seq->seq();  | 
| 
925
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $seqct++;  | 
| 
926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
928
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    my $precisionstr = "%.$Precision"."f";  | 
| 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
930
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    for( my $i = 0; $i < $seqct-1; $i++ ) {  | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # (diagonals) distance is 0 for same sequence  | 
| 
932
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $dist{$names[$i]}->{$names[$i]} = [$i,$i];  | 
| 
933
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        $values[$i][$i] = sprintf($precisionstr,0);  | 
| 
934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
935
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
        for( my $j = $i+1; $j < $seqct; $j++ ) {  | 
| 
936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }     | 
| 
938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Tajima and Nei, Mol. Biol. Evol. 1984, 1, 269.  | 
| 
941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  Tajima-Nei correction used for multiple substitutions in the calc  | 
| 
942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # of the distance matrix. Nucleic acids only.  | 
| 
943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  D = p-distance = 1 - (matches/(posns_scored + gaps)  | 
| 
945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  distance = -b * ln(1-D/b)  | 
| 
947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 D_TajimaNei  | 
| 
950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : D_TajimaNei  | 
| 
952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $d = $stat->D_TajimaNei($aln)  | 
| 
953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculates D (pairwise distance) between 2 sequences in an   | 
| 
954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            alignment using the TajimaNei 1984 distance model.   | 
| 
955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : L  | 
| 
956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : Bio::Align::AlignI of DNA sequences  | 
| 
957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub D_TajimaNei{  | 
| 
962
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
8
 | 
    my ($self,$aln) = @_;  | 
| 
963
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    return 0 unless $self->_check_arg($aln);  | 
| 
964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # ambiguities ignored at this point  | 
| 
965
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    my (@seqs,@names,@values,%dist);  | 
| 
966
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    my $seqct = 0;  | 
| 
967
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    foreach my $seq ( $aln->each_seq) {  | 
| 
968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # if there is no name,   | 
| 
969
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
        push @names, $seq->display_id;  | 
| 
970
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
        push @seqs, uc $seq->seq();  | 
| 
971
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
        $seqct++;  | 
| 
972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
973
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    my $precisionstr = "%.$Precision"."f";  | 
| 
974
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    my ($i,$j,$bs);  | 
| 
975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # pairwise  | 
| 
976
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    for( $i =0; $i < $seqct -1; $i++ ) {  | 
| 
977
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
        $dist{$names[$i]}->{$names[$i]} = [$i,$i];  | 
| 
978
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
        $values[$i][$i] = sprintf($precisionstr,0);  | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
980
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
        for ( $j = $i+1; $j <$seqct;$j++ ) {  | 
| 
981
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	   my ($matrix,$pfreq,$gaps) = $self->_build_nt_matrix($seqs[$i],  | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 							       $seqs[$j]);  | 
| 
983
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	   my $pairwise = $aln->select_noncont($i+1,$j+1);  | 
| 
984
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	   my $slen = $self->pairwise_stats->number_of_comparable_bases($pairwise);	      | 
| 
985
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	   my $fij2 = 0;  | 
| 
986
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	   for( $bs = 0; $bs < 4; $bs++ ) {  | 
| 
987
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	       my $fi = 0;  | 
| 
988
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	       map {$fi += $matrix->[$bs]->[$_] } 0..3;  | 
| 
 
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
    | 
| 
989
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	       my $fj = 0;  | 
| 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       # summation   | 
| 
991
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 	       map { $fj += $matrix->[$_]->[$bs] } 0..3;  | 
| 
 
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
    | 
| 
992
 | 
8
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
27
 | 
 	       my $fij = ( $fi && $fj ) ? ($fi + $fj) /( 2 * $slen) : 0;  | 
| 
993
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	       $fij2 += $fij**2;  | 
| 
994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }  | 
| 
995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     | 
| 
996
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	   my ($pair,$h) = (0,0);  | 
| 
997
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	   for( $bs = 0; $bs < 3; $bs++ ) {  | 
| 
998
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	       for(my $bs1 = $bs+1; $bs1 <= 3; $bs1++ ) {  | 
| 
999
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 		   my $fij = $pfreq->[$pair++] / $slen;  | 
| 
1000
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 		   if( $fij ) {  | 
| 
1001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		         | 
| 
1002
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 		       my ($ci1,$ci2,$cj1,$cj2) = (0,0,0,0);  | 
| 
1003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1004
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 		       map { $ci1 += $matrix->[$_]->[$bs] } 0..3;  | 
| 
 
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
    | 
| 
1005
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 		       map { $cj1 += $matrix->[$bs]->[$_] } 0..3;  | 
| 
 
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
    | 
| 
1006
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 		       map { $ci2 += $matrix->[$_]->[$bs1] } 0..3;  | 
| 
 
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
    | 
| 
1007
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 		       map { $cj2 += $matrix->[$bs1]->[$_] } 0..3;  | 
| 
 
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
    | 
| 
1008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		         | 
| 
1009
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 		       if( $fij ) {  | 
| 
1010
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
 			   $h += ( ($fij**2) / 2 ) /   | 
| 
1011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			       (  ( ( $ci1 + $cj1 ) / (2 * $slen) ) *  | 
| 
1012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  ( ( $ci2 + $cj2 ) / (2 * $slen) )   | 
| 
1013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  );  | 
| 
1014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		       }  | 
| 
1015
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
72
 | 
 		       $self->debug( "slen is $slen h is $h fij = $fij ci1 =$ci1 cj1=$cj1 ci2=$ci2 cj2=$cj2\n");  | 
| 
1016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		   }  | 
| 
1017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       }  | 
| 
1018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }  | 
| 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   # just want diagonals which are matches (A matched A, C -> C)  | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1021
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	   my $m = ( $matrix->[0]->[0] + $matrix->[1]->[1] +   | 
| 
1022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     $matrix->[2]->[2] + $matrix->[3]->[3] );  | 
| 
1023
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	   my $D = 1 - ( $m / $slen);  | 
| 
1024
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	   my $d;  | 
| 
1025
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	   if( $h == 0 ) {  | 
| 
1026
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	       $d = -1;  | 
| 
1027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   } else {  | 
| 
1028
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	       my $b = (1 - $fij2 + (($D**2)/$h)) / 2;  | 
| 
1029
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	       my $c = 1- $D/ $b;  | 
| 
1030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1031
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	       if( $c < 0 ) {  | 
| 
1032
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		   $d = -1;  | 
| 
1033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       } else {   | 
| 
1034
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		   $d = (-1 * $b) * log ( $c);  | 
| 
1035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       }  | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }  | 
| 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   # fwd and rev lookup  | 
| 
1038
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	   $dist{$names[$i]}->{$names[$j]} = [$i,$j];  | 
| 
1039
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	   $dist{$names[$j]}->{$names[$i]} = [$i,$j];	     | 
| 
1040
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	   $values[$j][$i] = $values[$i][$j] = sprintf($precisionstr,$d);  | 
| 
1041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # (diagonals) distance is 0 for same sequence  | 
| 
1043
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	   $dist{$names[$j]}->{$names[$j]} = [$j,$j];	     | 
| 
1044
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 	   $values[$j][$j] = sprintf($precisionstr,0);   | 
| 
1045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        }  | 
| 
1046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
1047
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
    return Bio::Matrix::PhylipDist->new(-program => 'bioperl_DNAstats',  | 
| 
1048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -matrix  => \%dist,  | 
| 
1049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -names   => \@names,  | 
| 
1050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       -values  => \@values);   | 
| 
1051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Jin and Nei, Mol. Biol. Evol. 82, 7, 1990.  | 
| 
1055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 D_JinNei  | 
| 
1057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : D_JinNei  | 
| 
1059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $d = $stat->D_JinNei($aln)  | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculates D (pairwise distance) between 2 sequences in an   | 
| 
1061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            alignment using the Jin-Nei 1990 distance model.   | 
| 
1062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : L  | 
| 
1063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : L of DNA sequences  | 
| 
1064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1067
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub D_JinNei{  | 
| 
1069
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
    my ($self,@args) = @_;  | 
| 
1070
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    $self->warn("JinNei implementation not completed");  | 
| 
1071
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    return;  | 
| 
1072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 transversions  | 
| 
1075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : transversions  | 
| 
1077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $transversions = $stats->transversion($aln);  | 
| 
1078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculates the number of transversions between two sequences in   | 
| 
1079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            an alignment  | 
| 
1080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : integer  | 
| 
1081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : Bio::Align::AlignI  | 
| 
1082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1085
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub transversions{  | 
| 
1087
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
  
1
  
 | 
480
 | 
    my ($self,$aln) = @_;  | 
| 
1088
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
    return $self->_trans_count_helper($aln, $DNAChanges{'Transversions'});  | 
| 
1089
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1090
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 transitions  | 
| 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1093
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : transitions  | 
| 
1094
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $transitions = Bio::Align::DNAStatistics->transitions($aln);  | 
| 
1095
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Calculates the number of transitions in a given DNA alignment  | 
| 
1096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : integer representing the number of transitions  | 
| 
1097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : Bio::Align::AlignI object  | 
| 
1098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub transitions{  | 
| 
1103
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
  
1
  
 | 
19
 | 
    my ($self,$aln) = @_;  | 
| 
1104
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
    return $self->_trans_count_helper($aln, $DNAChanges{'Transitions'});  | 
| 
1105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _trans_count_helper {  | 
| 
1109
 | 
12
 | 
 
 | 
 
 | 
  
12
  
 | 
 
 | 
28
 | 
     my ($self,$aln,$type) = @_;  | 
| 
1110
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     return 0 unless( $self->_check_arg($aln) );  | 
| 
1111
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
41
 | 
     if( ! $aln->is_flush ) { $self->throw("must be flush") }  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1112
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     my (@tcount);  | 
| 
1113
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     my ($first,$second) = ( uc $aln->get_seq_by_pos(1)->seq(),  | 
| 
1114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			    uc $aln->get_seq_by_pos(2)->seq() );  | 
| 
1115
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     my $alen = $aln->length;   | 
| 
1116
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     for (my $i = 0;$i<$alen; $i++ ) {   | 
| 
1117
 | 
2298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2697
 | 
 	my ($c1,$c2) = ( substr($first,$i,1),  | 
| 
1118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 substr($second,$i,1) );  | 
| 
1119
 | 
2298
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3263
 | 
 	if( $c1 ne $c2 ) {   | 
| 
1120
 | 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
707
 | 
 	    foreach my $nt ( @{$type->{$c1}} ) {  | 
| 
 
 | 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
677
 | 
    | 
| 
1121
 | 
477
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
695
 | 
 		if( $nt eq $c2) {  | 
| 
1122
 | 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
209
 | 
 		   $tcount[$i]++;  | 
| 
1123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       }  | 
| 
1124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1127
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     my $sum = 0;  | 
| 
1128
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
27
 | 
     map { if( $_) { $sum += $_} } @tcount;  | 
| 
 
 | 
2025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2286
 | 
    | 
| 
 
 | 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
183
 | 
    | 
| 
1129
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
60
 | 
     return $sum;  | 
| 
1130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # this will generate a matrix which records across the row, the number  | 
| 
1133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # of DNA subst   | 
| 
1134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   | 
| 
1135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _build_nt_matrix {  | 
| 
1136
 | 
19
 | 
 
 | 
 
 | 
  
19
  
 | 
 
 | 
35
 | 
     my ($self,$seqa,$seqb) = @_;  | 
| 
1137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1139
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
85
 | 
     my $basect_matrix = [ [ qw(0 0 0 0) ],  # number of bases that match  | 
| 
1140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  [ qw(0 0 0 0) ],  | 
| 
1141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  [ qw(0 0 0 0) ],  | 
| 
1142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  [ qw(0 0 0 0) ] ];  | 
| 
1143
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     my $gaps = 0;                           # number of gaps  | 
| 
1144
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
     my $pfreq = [ qw( 0 0 0 0 0 0)];        # matrix for pair frequency  | 
| 
1145
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     my $len_a = length($seqa);  | 
| 
1146
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
     for( my $i = 0; $i < $len_a; $i++) {  | 
| 
1147
 | 
1974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2649
 | 
 	my ($ti,$tj) = (substr($seqa,$i,1),substr($seqb,$i,1));  | 
| 
1148
 | 
1974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1863
 | 
 	$ti =~ tr/U/T/;  | 
| 
1149
 | 
1974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1704
 | 
 	$tj =~ tr/U/T/;  | 
| 
1150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1151
 | 
1974
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3912
 | 
 	if( $ti =~ /^$GapChars$/) { $gaps++; next; }  | 
| 
 
 | 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
138
 | 
    | 
| 
 
 | 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
207
 | 
    | 
| 
1152
 | 
1823
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3330
 | 
 	if( $tj =~ /^$GapChars$/) { $gaps++; next }  | 
| 
 
 | 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
260
 | 
    | 
| 
 
 | 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
485
 | 
    | 
| 
1153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1154
 | 
1505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1642
 | 
 	my $ti_index = $NucleotideIndexes{$ti};		  | 
| 
1155
 | 
1505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1343
 | 
 	my $tj_index = $NucleotideIndexes{$tj};	      | 
| 
1156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1157
 | 
1505
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1811
 | 
 	if( ! defined $ti_index ) {  | 
| 
1158
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    $self->warn("ti_index not defined for $ti\n");  | 
| 
1159
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    next;  | 
| 
1160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1162
 | 
1505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1433
 | 
 	$basect_matrix->[$ti_index]->[$tj_index]++;  | 
| 
1163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1164
 | 
1505
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2705
 | 
 	if( $ti ne $tj ) {  | 
| 
1165
 | 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
435
 | 
 	    $pfreq->[$NucleotideIndexes{join('',sort ($ti,$tj))}]++;  | 
| 
1166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1168
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     return ($basect_matrix,$pfreq,$gaps);  | 
| 
1169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _check_ambiguity_nucleotide {  | 
| 
1172
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
     my ($base1,$base2) = @_;  | 
| 
1173
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my %iub = Bio::Tools::IUPAC->iupac_iub();  | 
| 
1174
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @amb1 = @{ $iub{uc($base1)} };  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1175
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @amb2 = @{ $iub{uc($base2)} };      | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1176
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my ($pmatch) = (0);  | 
| 
1177
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     for my $amb ( @amb1 ) {  | 
| 
1178
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if( grep { $amb eq $_ } @amb2 ) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1179
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    $pmatch = 1;  | 
| 
1180
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    last;  | 
| 
1181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1183
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if( $pmatch ) {   | 
| 
1184
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return (1 / scalar @amb1) * (1 / scalar @amb2);  | 
| 
1185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {   | 
| 
1186
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return 0;  | 
| 
1187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _check_arg {  | 
| 
1192
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
43
 | 
     my($self,$aln ) = @_;  | 
| 
1193
 | 
25
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
177
 | 
     if( ! defined $aln || ! $aln->isa('Bio::Align::AlignI') ) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1194
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$self->warn("Must provide a Bio::Align::AlignI compliant object to Bio::Align::DNAStatistics");  | 
| 
1195
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return 0;  | 
| 
1196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif( $aln->get_seq_by_pos(1)->alphabet ne 'dna' ) {   | 
| 
1197
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$self->warn("Must provide a DNA alignment to Bio::Align::DNAStatistics, you provided a " . $aln->get_seq_by_pos(1)->alphabet);  | 
| 
1198
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return 0;  | 
| 
1199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1200
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
     return 1;  | 
| 
1201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Data Methods  | 
| 
1204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 pairwise_stats  | 
| 
1208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : pairwise_stats  | 
| 
1210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : $obj->pairwise_stats($newval)  | 
| 
1211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function:   | 
| 
1212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : value of pairwise_stats  | 
| 
1213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : newvalue (optional)  | 
| 
1214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub pairwise_stats{  | 
| 
1219
 | 
18
 | 
 
 | 
 
 | 
  
18
  
 | 
  
1
  
 | 
48
 | 
    my ($self,$value) = @_;  | 
| 
1220
 | 
18
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
48
 | 
    if( defined $value) {  | 
| 
1221
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
       $self->{'_pairwise_stats'} = $value;  | 
| 
1222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1223
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
109
 | 
     return $self->{'_pairwise_stats'};  | 
| 
1224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 calc_KaKs_pair  | 
| 
1228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title    : calc_KaKs_pair  | 
| 
1230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Useage   : my $results = $stats->calc_KaKs_pair($alnobj,  | 
| 
1231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $name1, $name2).  | 
| 
1232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function : calculates Nei-Gojobori statistics for pairwise   | 
| 
1233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             comparison.  | 
| 
1234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args     : A Bio::Align::AlignI compliant object such as a   | 
| 
1235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             Bio::SimpleAlign object, and 2 sequence name strings.  | 
| 
1236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns  : a reference to a hash of statistics with keys as   | 
| 
1237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             listed in Description.  | 
| 
1238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub calc_KaKs_pair {  | 
| 
1242
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
466
 | 
     my ( $self, $aln, $seq1_id, $seq2_id) = @_;  | 
| 
1243
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     $self->throw("Needs 3 arguments - an alignment object, and 2 sequence ids")   | 
| 
1244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if @_!= 4;  | 
| 
1245
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     $self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');  | 
| 
1246
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     my @seqs = (  | 
| 
1247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#{id => $seq1_id, seq =>($aln->each_seq_with_id($seq1_id))[0]->seq},  | 
| 
1248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#{id => $seq2_id, seq =>($aln->each_seq_with_id($seq2_id))[0]->seq}  | 
| 
1249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{id => $seq1_id, seq => uc(($aln->each_seq_with_id($seq1_id))[0]->seq)},  | 
| 
1250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 {id => $seq2_id, seq => uc(($aln->each_seq_with_id($seq2_id))[0]->seq)}  | 
| 
1251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       ) ;  | 
| 
1252
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     if (length($seqs[0]{'seq'}) != length($seqs[1]{'seq'})) {  | 
| 
1253
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$self->throw(" aligned sequences must be of equal length!");  | 
| 
1254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1255
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     my $results = [];  | 
| 
1256
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     $self->_get_av_ds_dn(\@seqs, $results);  | 
| 
1257
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     return $results;  | 
| 
1258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 calc_all_KaKs_pairs  | 
| 
1262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title    : calc_all_KaKs_pairs  | 
| 
1264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Useage   : my $results2 = $stats->calc_KaKs_pair($alnobj).  | 
| 
1265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function : Calculates Nei_gojobori statistics for all pairwise  | 
| 
1266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             combinations in sequence.  | 
| 
1267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Arguments: A Bio::Align::ALignI compliant object such as  | 
| 
1268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             a Bio::SimpleAlign object.  | 
| 
1269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns  : A reference to an array of hashes of statistics of  | 
| 
1270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             all pairwise comparisons in the alignment.  | 
| 
1271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub calc_all_KaKs_pairs {  | 
| 
1277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #returns a multi_element_array with all pairwise comparisons  | 
| 
1278
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
1162
 | 
 	my ($self,$aln) = @_;  | 
| 
1279
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	$self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');  | 
| 
1280
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my @seqs;  | 
| 
1281
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	for my $seq ($aln->each_seq) {  | 
| 
1282
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 		push @seqs, {id => $seq->display_id, seq=>$seq->seq};  | 
| 
1283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1284
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	my $results ;  | 
| 
1285
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	$results = $self->_get_av_ds_dn(\@seqs, $results);  | 
| 
1286
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	return $results;  | 
| 
1287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 calc_average_KaKs  | 
| 
1290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title    : calc_average_KaKs.    | 
| 
1292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Useage   : my $res= $stats->calc_average_KaKs($alnobj, 1000).  | 
| 
1293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function : calculates Nei_Gojobori stats for average of all   | 
| 
1294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             sequences in the alignment.  | 
| 
1295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args     : A Bio::Align::AlignI compliant object such as a  | 
| 
1296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             Bio::SimpleAlign object, number of bootstrap iterations  | 
| 
1297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             (default 1000).  | 
| 
1298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns  : A reference to a hash of statistics as listed in Description.  | 
| 
1299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub calc_average_KaKs {  | 
| 
1303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #calculates global value for sequences in alignment using bootstrapping  | 
| 
1304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #this is quite slow (~10 seconds per  3 X 200nt seqs);   | 
| 
1305
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
1025
 | 
     my ($self, $aln, $bootstrap_rpt) = @_;  | 
| 
1306
 | 
1
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
4
 | 
     $bootstrap_rpt ||= 1000;  | 
| 
1307
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     $self->throw ("This calculation needs a Bio::Align::AlignI compatible object, not a [ " . ref($aln) . " ]object") unless $aln->isa('Bio::Align::AlignI');  | 
| 
1308
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
     my @seqs;  | 
| 
1309
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     for my $seq ($aln->each_seq) {  | 
| 
1310
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	push @seqs, {id => $seq->display_id, seq=>$seq->seq};  | 
| 
1311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1312
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     my $results ;  | 
| 
1313
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my ($ds_orig, $dn_orig) = $self->_get_av_ds_dn(\@seqs);  | 
| 
1314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #print "ds = $ds_orig, dn = $dn_orig\n";  | 
| 
1315
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     $results = {D_s => $ds_orig, D_n => $dn_orig};  | 
| 
1316
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     $self->_run_bootstrap(\@seqs, $results, $bootstrap_rpt);  | 
| 
1317
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     return $results;  | 
| 
1318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ############## primary internal subs for alignment comparisons ########################  | 
| 
1321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _run_bootstrap {  | 
| 
1323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ### generates sampled sequences, calculates Ds and Dn values,  | 
| 
1324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ### then calculates variance of sampled sequences and add results to results hash  | 
| 
1325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ###   | 
| 
1326
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
2
 | 
     my ($self,$seq_ref, $results, $bootstrap_rpt) = @_;	  | 
| 
1327
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     my @seqs = @$seq_ref;  | 
| 
1328
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     my @btstrp_aoa; # to hold array of array of nucleotides for resampling  | 
| 
1329
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     my %bootstrap_values = (ds => [], dn =>[]);	# to hold list of av values   | 
| 
1330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #1st make alternative array of codons;  | 
| 
1332
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
     my $c = 0;  | 
| 
1333
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     while ($c < length $seqs[0]{'seq'}) {  | 
| 
1334
 | 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
 	for (0..$#seqs) {  | 
| 
1335
 | 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
149
 | 
 	    push @{$btstrp_aoa[$_]}, substr ($seqs[$_]{'seq'}, $c, 3);  | 
| 
 
 | 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
575
 | 
    | 
| 
1336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1337
 | 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
 	$c+=3;  | 
| 
1338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1340
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     for (1..$bootstrap_rpt) {  | 
| 
1341
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
207
 | 
 	my $sampled = _resample (\@btstrp_aoa);  | 
| 
1342
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
219
 | 
 	my ($ds, $dn) = $self->_get_av_ds_dn ($sampled) ; # is array ref  | 
| 
1343
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
129
 | 
 	push @{$bootstrap_values{'ds'}}, $ds;  | 
| 
 
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
183
 | 
    | 
| 
1344
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
109
 | 
 	push @{$bootstrap_values{'dn'}}, $dn;  | 
| 
 
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
319
 | 
    | 
| 
1345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }	  | 
| 
1346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1347
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     $results->{'D_s_var'} = sampling_variance($bootstrap_values{'ds'});  | 
| 
1348
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     $results->{'D_n_var'} = sampling_variance($bootstrap_values{'dn'});  | 
| 
1349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $results->{'z_score'} = 	($results->{'D_n'} - $results->{'D_s'}) /   | 
| 
1350
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	sqrt($results->{'D_s_var'} + $results->{'D_n_var'} );   | 
| 
1351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #print "bootstrapped var_syn = 	$results->{'D_s_var'} \n" ;  | 
| 
1352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #print "bootstrapped var_nc = 	$results->{'D_n_var'} \n";   | 
| 
1353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #print "z is $results->{'z_score'}\n";	### end of global set up of/perm look up data  | 
| 
1354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _resample {  | 
| 
1357
 | 
100
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
107
 | 
     my $ref = shift;  | 
| 
1358
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
94
 | 
     my $codon_num = scalar (@{$ref->[0]});  | 
| 
 
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
145
 | 
    | 
| 
1359
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
102
 | 
     my @altered;  | 
| 
1360
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
171
 | 
     for (0..$codon_num -1) {	#for each codon  | 
| 
1361
 | 
5700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6348
 | 
 	my $rand = int (rand ($codon_num));  | 
| 
1362
 | 
5700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6685
 | 
 	for (0..$#$ref) {  | 
| 
1363
 | 
17100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14813
 | 
 	    push @{$altered[$_]}, $ref->[$_][$rand];  | 
| 
 
 | 
17100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26744
 | 
    | 
| 
1364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1366
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
134
 | 
     my @stringed = map {join '', @$_}@altered;  | 
| 
 
 | 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1224
 | 
    | 
| 
1367
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
139
 | 
     my @return;  | 
| 
1368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #now out in random name to keep other subs happy  | 
| 
1369
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
155
 | 
     for (@stringed) {  | 
| 
1370
 | 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
664
 | 
 	push @return, {id=>'1', seq=> $_};  | 
| 
1371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1372
 | 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
820
 | 
     return \@return;  | 
| 
1373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _get_av_ds_dn {  | 
| 
1376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # takes array of hashes of sequence strings and ids   #  | 
| 
1377
 | 
103
 | 
 
 | 
 
 | 
  
103
  
 | 
 
 | 
125
 | 
     my $self = shift;  | 
| 
1378
 | 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
129
 | 
     my $seq_ref = shift;  | 
| 
1379
 | 
103
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
190
 | 
     my $result = shift if @_;  | 
| 
1380
 | 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
341
 | 
     my @caller = caller(1);  | 
| 
1381
 | 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1812
 | 
     my @seqarray = @$seq_ref;  | 
| 
1382
 | 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
111
 | 
     my $bootstrap_score_list;  | 
| 
1383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #for a multiple alignment considers all pairwise combinations#  | 
| 
1384
 | 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
225
 | 
     my %dsfor_average = (ds => [], dn => []);   | 
| 
1385
 | 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
203
 | 
     for (my $i = 0; $i < scalar @seqarray; $i++) {  | 
| 
1386
 | 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
589
 | 
 	for (my $j = $i +1; $j
 | 
| 
1387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			print "comparing $i and $j\n";  | 
| 
1388
 | 
307
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
571
 | 
 	    if (length($seqarray[$i]{'seq'}) != length($seqarray[$j]{'seq'})) {  | 
| 
1389
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$self->warn(" aligned sequences must be of equal length!");  | 
| 
1390
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		next;  | 
| 
1391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1393
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
458
 | 
 	    my $syn_site_count = count_syn_sites($seqarray[$i]{'seq'}, $synsites);  | 
| 
1394
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
504
 | 
 	    my $syn_site_count2 = count_syn_sites($seqarray[$j]{'seq'}, $synsites);  | 
| 
1395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #			print "syn 1 is $syn_site_count , syn2 is $syn_site_count2\n";  | 
| 
1396
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
526
 | 
 	    my ($syn_count, $non_syn_count, $gap_cnt) = analyse_mutations($seqarray[$i]{'seq'}, $seqarray[$j]{'seq'});	  | 
| 
1397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #get averages  | 
| 
1398
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
484
 | 
 	    my $av_s_site = ($syn_site_count + $syn_site_count2)/2;  | 
| 
1399
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
561
 | 
 	    my $av_ns_syn_site = length($seqarray[$i]{'seq'}) - $gap_cnt- $av_s_site ;  | 
| 
1400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #calculate ps and pn  (p54)  | 
| 
1402
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
325
 | 
 	    my $syn_prop = $syn_count / $av_s_site;  | 
| 
1403
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
317
 | 
 	    my $nc_prop = $non_syn_count / $av_ns_syn_site	;  | 
| 
1404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #now use jukes/cantor to calculate D_s and D_n, would alter here if needed a different method  | 
| 
1406
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
564
 | 
 	    my $d_syn = $self->jk($syn_prop);  | 
| 
1407
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
451
 | 
 	    my $d_nc = $self->jk($nc_prop);  | 
| 
1408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #JK calculation must succeed for continuation of calculation  | 
| 
1410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #ret_value = -1 if error  | 
| 
1411
 | 
307
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
830
 | 
 	    next unless $d_nc >=0 && $d_syn >=0;  | 
| 
1412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1414
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
318
 | 
 	    push @{$dsfor_average{'ds'}}, $d_syn;  | 
| 
 
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
538
 | 
    | 
| 
1415
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
298
 | 
 	    push @{$dsfor_average{'dn'}}, $d_nc;  | 
| 
 
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
444
 | 
    | 
| 
1416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #if not doing bootstrap, calculate the pairwise comparisin stats  | 
| 
1418
 | 
307
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1420
 | 
 	    if ($caller[3] =~ /calc_KaKs_pair/ || $caller[3] =~ /calc_all_KaKs_pairs/) {  | 
| 
1419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				#now calculate variances assuming large sample  | 
| 
1420
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 		my $d_syn_var =  jk_var($syn_prop, length($seqarray[$i]{'seq'})  - $gap_cnt );  | 
| 
1421
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		my $d_nc_var =  jk_var($nc_prop, length ($seqarray[$i]{'seq'}) - $gap_cnt);  | 
| 
1422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#now calculate z_value  | 
| 
1423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#print "d_syn_var is  $d_syn_var,and d_nc_var is $d_nc_var\n";  | 
| 
1424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#my $z = ($d_nc - $d_syn) / sqrt($d_syn_var + $d_nc_var);  | 
| 
1425
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 		my $z = ($d_syn_var + $d_nc_var) ?   | 
| 
1426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  ($d_nc - $d_syn) / sqrt($d_syn_var + $d_nc_var) : 0;  | 
| 
1427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#	print "z is $z\n";  | 
| 
1428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		push @$result , {S => $av_s_site, N=>$av_ns_syn_site,  | 
| 
1429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 S_d => $syn_count, N_d =>$non_syn_count,  | 
| 
1430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 P_s => $syn_prop, P_n=>$nc_prop,  | 
| 
1431
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 				 D_s => @{$dsfor_average{'ds'}}[-1],  | 
| 
1432
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
60
 | 
 				 D_n => @{$dsfor_average{'dn'}}[-1],  | 
| 
1433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 D_n_var =>$d_nc_var, D_s_var => $d_syn_var,  | 
| 
1434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 Seq1 => $seqarray[$i]{'id'},  | 
| 
1435
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 				 Seq2 => $seqarray[$j]{'id'},  | 
| 
1436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 z_score => $z,  | 
| 
1437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			     };  | 
| 
1438
 | 
4
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
24
 | 
 		$self->warn (" number of mutations too small to justify normal test for  $seqarray[$i]{'id'} and $seqarray[$j]{'id'}\n- use Fisher's exact, or bootstrap a MSA")  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    if ($syn_count < 10 || $non_syn_count < 10 ) && $self->verbose > -1 ;  | 
| 
1440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }#endif  | 
| 
1441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #warn of failure if no results hashes are present  | 
| 
1445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #will fail if Jukes Cantor has failed for all pairwise combinations  | 
| 
1446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #$self->warn("calculation failed!") if scalar @$result ==0;  | 
| 
1447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #return results unless bootstrapping  | 
| 
1449
 | 
103
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
345
 | 
     return $result if $caller[3]=~ /calc_all_KaKs/ || $caller[3] =~ /calc_KaKs_pair/;   | 
| 
1450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #else if getting average for bootstrap  | 
| 
1451
 | 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
226
 | 
     return( mean ($dsfor_average{'ds'}),mean ($dsfor_average{'dn'})) ;  | 
| 
1452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub jk {  | 
| 
1456
 | 
614
 | 
 
 | 
 
 | 
  
614
  
 | 
  
0
  
 | 
669
 | 
     my ($self, $p) = @_;  | 
| 
1457
 | 
614
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
826
 | 
     if ($p > 0.75) {  | 
| 
1458
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$self->warn( " Jukes Cantor won't  work -too divergent!");  | 
| 
1459
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return -1;  | 
| 
1460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1461
 | 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1081
 | 
     return -1 * (3/4) * (log(1 - (4/3) * $p));  | 
| 
1462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #works for large value of n (50?100?)  | 
| 
1465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub jk_var {  | 
| 
1466
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
  
0
  
 | 
11
 | 
     my ($p, $n) = @_;  | 
| 
1467
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     return (9 * $p * (1 -$p))/(((3 - 4 *$p) **2) * $n);  | 
| 
1468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # compares 2 sequences to find the number of synonymous/non  | 
| 
1472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # synonymous mutations between them  | 
| 
1473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub analyse_mutations {  | 
| 
1475
 | 
307
 | 
 
 | 
 
 | 
  
307
  
 | 
  
0
  
 | 
406
 | 
     my ($seq1, $seq2) = @_;  | 
| 
1476
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1978
 | 
     my %mutator = ( 2=> {0=>[[1,2],  # codon positions to be altered   | 
| 
1477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			     [2,1]], # depend on which is the same  | 
| 
1478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 1=>[[0,2],  | 
| 
1479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			     [2,0]],  | 
| 
1480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 2=>[[0,1],  | 
| 
1481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			     [1,0]],	  | 
| 
1482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     },  | 
| 
1483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    3=> [ [0,1,2],  # all need to be altered   | 
| 
1484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  [1,0,2],  | 
| 
1485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  [0,2,1],  | 
| 
1486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  [1,2,0],  | 
| 
1487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  [2,0,1],  | 
| 
1488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  [2,1,0] ],  | 
| 
1489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    );  | 
| 
1490
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
513
 | 
     my $TOTAL   = 0;    # total synonymous changes  | 
| 
1491
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
320
 | 
     my $TOTAL_n = 0;	# total non-synonymous changes  | 
| 
1492
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
289
 | 
     my $gap_cnt = 0;  | 
| 
1493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1494
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
329
 | 
     my %input;  | 
| 
1495
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
304
 | 
     my $seqlen = length($seq1);  | 
| 
1496
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
461
 | 
     for (my $j=0; $j< $seqlen; $j+=3) {  | 
| 
1497
 | 
17499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20785
 | 
 	$input{'cod1'} = substr($seq1, $j,3);  | 
| 
1498
 | 
17499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18369
 | 
 	$input{'cod2'} = substr($seq2, $j,3);  | 
| 
1499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#ignore codon if beeing compared with gaps!   | 
| 
1501
 | 
17499
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
46608
 | 
 	if ($input{'cod1'} =~ /\-/ || $input{'cod2'} =~ /\-/){  | 
| 
1502
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    $gap_cnt += 3; #just increments once if there is a pair of gaps  | 
| 
1503
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    next;  | 
| 
1504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1506
 | 
17499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21958
 | 
 	my ($diff_cnt, $same) = count_diffs(\%input);  | 
| 
1507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#ignore if codons are identical  | 
| 
1509
 | 
17499
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
30834
 | 
 	next if $diff_cnt == 0 ;  | 
| 
1510
 | 
4912
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5538
 | 
 	if ($diff_cnt == 1) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1511
 | 
3921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5725
 | 
 	    $TOTAL += $synchanges{$input{'cod1'}}{$input{'cod2'}};  | 
| 
1512
 | 
3921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7172
 | 
 	    $TOTAL_n += 1 - $synchanges{$input{'cod1'}}{$input{'cod2'}};  | 
| 
1513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #print " \nfordiff is 1 , total now $TOTAL, total n now $TOTAL_n\n\n"  | 
| 
1514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($diff_cnt ==2) {  | 
| 
1516
 | 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
920
 | 
 	    my $s_cnt = 0;  | 
| 
1517
 | 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
908
 | 
 	    my $n_cnt = 0;  | 
| 
1518
 | 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
874
 | 
 	    my $tot_muts = 4;  | 
| 
1519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #will stay 4 unless there are stop codons at intervening point  | 
| 
1520
 | 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
863
 | 
 	  OUTER:for my $perm (@{$mutator{'2'}{$same}}) {  | 
| 
 
 | 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1536
 | 
    | 
| 
1521
 | 
1982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2137
 | 
 	      my $altered = $input{'cod1'};  | 
| 
1522
 | 
1982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2056
 | 
 	      my $prev= $altered;  | 
| 
1523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      #		print "$prev -> (", $t[$CODONS->{$altered}], ")";  | 
| 
1524
 | 
1982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2042
 | 
 	      for 	my $mut_i (@$perm) { #index of codon mutated  | 
| 
1525
 | 
3964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4405
 | 
 		  substr($altered, $mut_i,1) = substr($input{'cod2'}, $mut_i, 1);  | 
| 
1526
 | 
3964
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5180
 | 
 		  if ($t[$CODONS->{$altered}] eq '*') {  | 
| 
1527
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		      $tot_muts -=2;  | 
| 
1528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      #print "changes to stop codon!!\n";  | 
| 
1529
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		      next OUTER;  | 
| 
1530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  }  | 
| 
1531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  else {  | 
| 
1532
 | 
3964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4717
 | 
 		      $s_cnt += $synchanges{$prev}{$altered};  | 
| 
1533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      #					print "$altered ->(", $t[$CODONS->{$altered}], ") ";  | 
| 
1534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  }  | 
| 
1535
 | 
3964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4522
 | 
 		  $prev = $altered;  | 
| 
1536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      }  | 
| 
1537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      #		print "\n";  | 
| 
1538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  }  | 
| 
1539
 | 
991
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1525
 | 
 	    if ($tot_muts != 0) {  | 
| 
1540
 | 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1339
 | 
 		$TOTAL += ($s_cnt/($tot_muts/2));  | 
| 
1541
 | 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1827
 | 
 		$TOTAL_n += ($tot_muts - $s_cnt)/ ($tot_muts / 2);  | 
| 
1542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($diff_cnt ==3 ) {  | 
| 
1546
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    my $s_cnt = 0;  | 
| 
1547
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    my $n_cnt = 0;  | 
| 
1548
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	    my $tot_muts = 18;	#potential number  of mutations  | 
| 
1549
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	  OUTER: for my $perm (@{$mutator{'3'}}) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1550
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	      my $altered = $input{'cod1'};  | 
| 
1551
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	      my $prev= $altered;  | 
| 
1552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      #	print "$prev -> (", $t[$CODONS->{$altered}], ")";  | 
| 
1553
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	      for my $mut_i (@$perm) { #index of codon mutated  | 
| 
1554
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		  substr($altered, $mut_i,1) = substr($input{'cod2'}, $mut_i, 1);  | 
| 
1555
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		  if ($t[$CODONS->{$altered}] eq '*') {  | 
| 
1556
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		      $tot_muts -=3;  | 
| 
1557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      #	print "changes to stop codon!!\n";  | 
| 
1558
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		      next OUTER;  | 
| 
1559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  }  | 
| 
1561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  else {  | 
| 
1562
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		      $s_cnt += $synchanges{$prev}{$altered};  | 
| 
1563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      #			print "$altered ->(", $t[$CODONS->{$altered}], ") ";  | 
| 
1564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  }  | 
| 
1565
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		  $prev = $altered;  | 
| 
1566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      }  | 
| 
1567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      #	print "\n";  | 
| 
1568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  }#end OUTER loop  | 
| 
1570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      #calculate number of synonymous/non synonymous mutations for that codon  | 
| 
1571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      # and add to total  | 
| 
1572
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	      if ($tot_muts != 0) {  | 
| 
1573
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		  $TOTAL += ($s_cnt / ($tot_muts /3));  | 
| 
1574
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		  $TOTAL_n += 3 - ($s_cnt / ($tot_muts /3));  | 
| 
1575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      }  | 
| 
1576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}			#endif $diffcnt = 3  | 
| 
1577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }				#end of sequencetraversal  | 
| 
1578
 | 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1680
 | 
     return ($TOTAL, $TOTAL_n, $gap_cnt);  | 
| 
1579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub count_diffs {  | 
| 
1583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #counts the number of nucleotide differences between 2 codons  | 
| 
1584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # returns this value plus the codon index of which nucleotide is the same when 2  | 
| 
1585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #nucleotides are different. This is so analyse_mutations() knows which nucleotides  | 
| 
1586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # to change.  | 
| 
1587
 | 
17499
 | 
 
 | 
 
 | 
  
17499
  
 | 
  
0
  
 | 
15403
 | 
     my $ref = shift;  | 
| 
1588
 | 
17499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16060
 | 
     my $cnt = 0;  | 
| 
1589
 | 
17499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15474
 | 
     my $same= undef;  | 
| 
1590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #just for 2 differences  | 
| 
1591
 | 
17499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19637
 | 
     for (0..2) {  | 
| 
1592
 | 
52497
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
70389
 | 
 	if (substr($ref->{'cod1'}, $_,1) ne substr($ref->{'cod2'}, $_, 1)){  | 
| 
1593
 | 
5903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6321
 | 
 	    $cnt++;  | 
| 
1594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} else {  | 
| 
1595
 | 
46594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48570
 | 
 	    $same = $_;  | 
| 
1596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1598
 | 
17499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24912
 | 
     return ($cnt, $same);  | 
| 
1599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 get_syn_changes  | 
| 
1602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : get_syn_changes  | 
| 
1604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : Bio::Align::DNAStatitics->get_syn_changes  | 
| 
1605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Generate a hashref of all pairwise combinations of codns  | 
| 
1606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            differing by 1  | 
| 
1607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : Symetic matrix using hashes  | 
| 
1608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            First key is codon  | 
| 
1609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            and each codon points to a hashref of codons  | 
| 
1610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            the values of which describe type of change.  | 
| 
1611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            my $type = $hash{$codon1}->{$codon2};  | 
| 
1612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            values are :  | 
| 
1613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              1   synonymous  | 
| 
1614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              0   non-syn  | 
| 
1615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             -1   either codon is a stop codon  | 
| 
1616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : none  | 
| 
1617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_syn_changes {  | 
| 
1621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #hash of all pairwise combinations of codons differing by 1  | 
| 
1622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 1 = syn, 0 = non-syn, -1 = stop  | 
| 
1623
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
6
 | 
     my %results;  | 
| 
1624
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     my @codons = _make_codons ();  | 
| 
1625
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
125
 | 
     my $arr_len = scalar @codons;  | 
| 
1626
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     for (my $i = 0; $i < $arr_len -1; $i++) {  | 
| 
1627
 | 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
302
 | 
 	my $cod1 = $codons[$i];  | 
| 
1628
 | 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
330
 | 
 	for (my $j = $i +1; $j < $arr_len; $j++) {  | 
| 
1629
 | 
8064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6724
 | 
 	    my $diff_cnt = 0;  | 
| 
1630
 | 
8064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7668
 | 
 	    for my $pos(0..2) {  | 
| 
1631
 | 
24192
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
32696
 | 
 		$diff_cnt++ if substr($cod1, $pos, 1) ne substr($codons[$j], $pos, 1);  | 
| 
1632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1633
 | 
8064
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11895
 | 
 	    next if $diff_cnt !=1;  | 
| 
1634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #synon change  | 
| 
1636
 | 
1152
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2909
 | 
 	    if($t[$CODONS->{$cod1}] eq $t[$CODONS->{$codons[$j]}]) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1637
 | 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
347
 | 
 		$results{$cod1}{$codons[$j]} =1;  | 
| 
1638
 | 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
455
 | 
 		$results{$codons[$j]}{$cod1} = 1;  | 
| 
1639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    #stop codon  | 
| 
1641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    elsif ($t[$CODONS->{$cod1}] eq '*' or $t[$CODONS->{$codons[$j]}] eq '*') {  | 
| 
1642
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
117
 | 
 		$results{$cod1}{$codons[$j]} = -1;  | 
| 
1643
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
151
 | 
 		$results{$codons[$j]}{$cod1} = -1;  | 
| 
1644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    # nc change  | 
| 
1646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    else {  | 
| 
1647
 | 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1029
 | 
 		$results{$cod1}{$codons[$j]} = 0;  | 
| 
1648
 | 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1377
 | 
 		$results{$codons[$j]}{$cod1} = 0;  | 
| 
1649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1652
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
142
 | 
     return %results;  | 
| 
1653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 dnds_pattern_number  | 
| 
1656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : dnds_pattern_number  | 
| 
1658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $patterns = $stats->dnds_pattern_number($alnobj);  | 
| 
1659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: Counts the number of codons with no gaps in the MSA  | 
| 
1660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : Number of codons with no gaps ('patterns' in PAML notation)  | 
| 
1661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : A Bio::Align::AlignI compliant object such as a  | 
| 
1662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             Bio::SimpleAlign object.  | 
| 
1663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub dnds_pattern_number{  | 
| 
1667
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($self, $aln) = @_;  | 
| 
1668
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return ($aln->remove_gaps->length)/3;  | 
| 
1669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub count_syn_sites {  | 
| 
1672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #counts the number of possible synonymous changes for sequence  | 
| 
1673
 | 
615
 | 
 
 | 
 
 | 
  
615
  
 | 
  
0
  
 | 
1123
 | 
     my ($seq, $synsite) = @_;  | 
| 
1674
 | 
615
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
910
 | 
     __PACKAGE__->throw("not integral number of codons") if length($seq) % 3 != 0;  | 
| 
1675
 | 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
588
 | 
     my $S = 0;  | 
| 
1676
 | 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
810
 | 
     for (my $i = 0; $i< length($seq); $i+=3) {  | 
| 
1677
 | 
34998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34394
 | 
 	my $cod = substr($seq, $i, 3);  | 
| 
1678
 | 
34998
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
42249
 | 
 	next if $cod =~ /\-/;	#deal with alignment gaps  | 
| 
1679
 | 
34998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48649
 | 
 	$S +=  $synsite->{$cod}{'s'};  | 
| 
1680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #print "S is $S\n";  | 
| 
1682
 | 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
835
 | 
     return $S;  | 
| 
1683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_syn_sites {  | 
| 
1688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #sub to generate lookup hash for the number of synonymous changes per codon  | 
| 
1689
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
0
  
 | 
10
 | 
     my @nucs = qw(T C A G);  | 
| 
1690
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     my %raw_results;  | 
| 
1691
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     for my $i (@nucs) {  | 
| 
1692
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 	for my $j (@nucs) {  | 
| 
1693
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
64
 | 
 	    for my $k (@nucs) {  | 
| 
1694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# for each possible codon  | 
| 
1695
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
322
 | 
           	my $cod = "$i$j$k";  | 
| 
1696
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
256
 | 
            	my $aa = $t[$CODONS->{$cod}];  | 
| 
1697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		#calculate number of synonymous mutations vs non syn mutations  | 
| 
1698
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
259
 | 
             	for my $i (qw(0 1 2)){  | 
| 
1699
 | 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
675
 | 
 		    my $s = 0;  | 
| 
1700
 | 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
620
 | 
 		    my $n = 3;  | 
| 
1701
 | 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
730
 | 
 		    for my $nuc (qw(A T C G)) {  | 
| 
1702
 | 
3072
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3812
 | 
 			next if substr ($cod, $i,1) eq $nuc;  | 
| 
1703
 | 
2304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1895
 | 
 			my $test = $cod;  | 
| 
1704
 | 
2304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2007
 | 
 			substr($test, $i, 1) = $nuc ;  | 
| 
1705
 | 
2304
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2769
 | 
 			if ($t[$CODONS->{$test}] eq $aa) {  | 
| 
1706
 | 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
457
 | 
 			    $s++;  | 
| 
1707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1708
 | 
2304
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2976
 | 
 			if ($t[$CODONS->{$test}] eq '*') {  | 
| 
1709
 | 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
105
 | 
 			    $n--;  | 
| 
1710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}	  | 
| 
1711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    }  | 
| 
1712
 | 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1500
 | 
 		    $raw_results{$cod}[$i] = {'s' => $s ,  | 
| 
1713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					      'n' => $n };  | 
| 
1714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    } #end analysis of single codon  | 
| 
1717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } #end analysis of all codons  | 
| 
1719
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     my %final_results;  | 
| 
1720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1721
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
95
 | 
     for my $cod (sort keys %raw_results) {  | 
| 
1722
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
233
 | 
     	my $t = 0;  | 
| 
1723
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
190
 | 
     	map{$t += ($_->{'s'} /$_->{'n'})} @{$raw_results{$cod}};  | 
| 
 
 | 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1100
 | 
    | 
| 
 
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
288
 | 
    | 
| 
1724
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
522
 | 
     	$final_results{$cod} = { 's'=>$t, 'n' => 3 -$t};  | 
| 
1725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1726
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
138
 | 
     return \%final_results;  | 
| 
1727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _make_codons {  | 
| 
1730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #makes all codon combinations, returns array of them  | 
| 
1731
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
 
 | 
22
 | 
     my @nucs = qw(T C A G);  | 
| 
1732
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     my @codons;  | 
| 
1733
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     for my $i (@nucs) {  | 
| 
1734
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
         for my $j (@nucs) {  | 
| 
1735
 | 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
117
 | 
             for my $k (@nucs) {  | 
| 
1736
 | 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
665
 | 
             	push @codons, "$i$j$k";  | 
| 
1737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }      | 
| 
1740
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
103
 | 
     return @codons;  | 
| 
1741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_codons {  | 
| 
1744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  #generates codon translation look up table#  | 
| 
1745
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
0
  
 | 
9
 | 
  my $x = 0;  | 
| 
1746
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
  my  $CODONS = {};  | 
| 
1747
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
  for my $codon (_make_codons) {  | 
| 
1748
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
324
 | 
      $CODONS->{$codon} = $x;  | 
| 
1749
 | 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
212
 | 
      $x++;  | 
| 
1750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  }   | 
| 
1751
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
  return $CODONS;  | 
| 
1752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #########stats subs, can go in another module? Here for speed. ###  | 
| 
1755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub mean {  | 
| 
1756
 | 
204
 | 
 
 | 
 
 | 
  
204
  
 | 
  
0
  
 | 
232
 | 
     my $ref = shift;  | 
| 
1757
 | 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
209
 | 
     my $el_num = scalar @$ref;  | 
| 
1758
 | 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
179
 | 
     my $tot = 0;  | 
| 
1759
 | 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
295
 | 
     map{$tot += $_}@$ref;  | 
| 
 
 | 
806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1030
 | 
    | 
| 
1760
 | 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
577
 | 
     return ($tot/$el_num);  | 
| 
1761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub variance {  | 
| 
1764
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
0
  
 | 
2
 | 
     my $ref = shift;  | 
| 
1765
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my $mean = mean($ref);  | 
| 
1766
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my $sum_of_squares = 0;  | 
| 
1767
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     map{$sum_of_squares += ($_ - $mean) **2}@$ref;  | 
| 
 
 | 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
207
 | 
    | 
| 
1768
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     return $sum_of_squares;  | 
| 
1769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub sampling_variance {  | 
| 
1772
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
0
  
 | 
2
 | 
     my $ref = shift;  | 
| 
1773
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     return variance($ref) / (scalar @$ref -1);  | 
| 
1774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  |