|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Bio::MUST::Core::SeqMask::Pmsf;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ABSTRACT: Posterior mean site frequencies (PMSF) for sequence sites  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Bio::MUST::Core::SeqMask::Pmsf::VERSION = '0.212650';  | 
| 
4
 | 
17
 | 
 
 | 
 
 | 
  
17
  
 | 
 
 | 
132
 | 
 use Moose;  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
    | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
148
 | 
    | 
| 
5
 | 
17
 | 
 
 | 
 
 | 
  
17
  
 | 
 
 | 
127712
 | 
 use namespace::autoclean;  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
    | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
194
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
17
 | 
 
 | 
 
 | 
  
17
  
 | 
 
 | 
1749
 | 
 use autodie;  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
    | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
169
 | 
    | 
| 
8
 | 
17
 | 
 
 | 
 
 | 
  
17
  
 | 
 
 | 
95543
 | 
 use feature qw(say);  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
78
 | 
    | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1819
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
17
 | 
 
 | 
 
 | 
  
17
  
 | 
 
 | 
127
 | 
 use Carp;  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
    | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1558
 | 
    | 
| 
11
 | 
17
 | 
 
 | 
 
 | 
  
17
  
 | 
 
 | 
120
 | 
 use List::AllUtils qw(sum each_arrayref);  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
    | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1240
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 extends 'Bio::MUST::Core::SeqMask';  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
15
 | 
17
 | 
 
 | 
 
 | 
  
17
  
 | 
 
 | 
126
 | 
 use Bio::MUST::Core::Types;  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
    | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
615
 | 
    | 
| 
16
 | 
17
 | 
 
 | 
 
 | 
  
17
  
 | 
 
 | 
129
 | 
 use aliased 'Bio::MUST::Core::SeqMask::Rates';  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
    | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
148
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # override superclass' Bool type  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Note: mask indices are as follow: [site][AA]  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #       mask values  are freqs  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has '+mask' => (  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     isa => 'ArrayRef[ArrayRef[Num]]',  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # TODO: mask non-applicable methods from superclass? (Liskov principle)  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub chi_square_stats {  | 
| 
31
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
14
 | 
     my $self = shift;  | 
| 
32
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my $othr = shift;  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # check that both pmsf objects are the same length  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # potential bugs could come from constant sites etc  | 
| 
36
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     my $s_width = $self->mask_len;  | 
| 
37
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     my $o_width = $othr->mask_len;  | 
| 
38
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     carp "[BMC] Warning: PMSF widths do not match: $s_width vs. $o_width!"  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unless $s_width == $o_width;  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my @stats;  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
     my $ea = each_arrayref [ $self->all_states ], [ $othr->all_states ];  | 
| 
44
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2342
 | 
     while (my ($s_freqs, $o_freqs) = $ea->() ) {  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         push @stats, 0 + ( sprintf "%.13f", sum map {  | 
| 
46
 | 
34607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
86049
 | 
             ( $o_freqs->[$_] - $s_freqs->[$_] )**2 / $s_freqs->[$_]  | 
| 
 
 | 
692140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2075854
 | 
    | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } 0..$#$o_freqs );  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }   # Note: trick to get identical results across platforms  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # https://stackoverflow.com/questions/21204733/a-better-chi-square-test-for-perl  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
51
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
105
 | 
     return Rates->new( mask => \@stats );  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # I/O methods  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub load {  | 
| 
59
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
596
 | 
     my $class  = shift;  | 
| 
60
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     my $infile = shift;  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     open my $in, '<', $infile;  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
64
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2664
 | 
     my $mask = $class->new();  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     LINE:  | 
| 
67
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2190
 | 
     while (my $line = <$in>) {  | 
| 
68
 | 
69214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
128809
 | 
         chomp $line;  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # skip empty lines, header line and process comment lines  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #       next LINE if $line =~ $EMPTY_LINE  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #                 || $mask->is_comment($line);  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # split line on whitespace and ignore first value (site number)  | 
| 
75
 | 
69214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
958004
 | 
         my (undef, @fields) = split /\s+/xms, $line;  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # store AA freqs all at once  | 
| 
78
 | 
69214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2879443
 | 
         $mask->add_state( \@fields );  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
81
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
     return $mask;  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __PACKAGE__->meta->make_immutable;  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::MUST::Core::SeqMask::Pmsf - Posterior mean site frequencies (PMSF) for sequence sites  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 VERSION  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 version 0.212650  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # TODO  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # TODO  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 chi_square_stats  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 load  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Denis BAURAIN <denis.baurain@uliege.be>  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 COPYRIGHT AND LICENSE  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is free software; you can redistribute it and/or modify it under  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the same terms as the Perl 5 programming language system itself.  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  |