File Coverage

blib/lib/Bio/MUST/Core/PostPred/Composition.pm
Criterion Covered Total %
statement 46 46 100.0
branch 2 2 100.0
condition 2 2 100.0
subroutine 8 8 100.0
pod n/a
total 58 58 100.0


line stmt bran cond sub pod time code
1             package Bio::MUST::Core::PostPred::Composition;
2             # ABSTRACT: Posterior predictive test for compositional bias
3             $Bio::MUST::Core::PostPred::Composition::VERSION = '0.212670';
4 17     17   12273 use Moose;
  17         51  
  17         194  
5 17     17   125431 use namespace::autoclean;
  17         48  
  17         154  
6              
7 17     17   1500 use autodie;
  17         43  
  17         142  
8 17     17   94526 use feature qw(say);
  17         62  
  17         1746  
9              
10             # use Smart::Comments;
11              
12 17     17   147 use List::AllUtils qw(sum);
  17         48  
  17         1401  
13 17     17   133 use Tie::IxHash;
  17         79  
  17         609  
14              
15 17     17   118 use Bio::MUST::Core::Types;
  17         54  
  17         8936  
16              
17              
18             has 'seqs' => (
19             is => 'ro',
20             isa => 'Bio::MUST::Core::Ali',
21             required => 1,
22             coerce => 1,
23             handles => [
24             qw(gapmiss_regex all_seqs)
25             ],
26             );
27              
28             # TODO: consider a role if more tests are implemented
29              
30             # private hash containing compositional biases
31             # Note: this hash is actually a Tie::IxHash (see builder)
32             has '_stat_for' => (
33             traits => ['Hash'],
34             is => 'ro',
35             isa => 'HashRef[Num]',
36             init_arg => undef,
37             lazy => 1,
38             builder => '_build_stat_for',
39             handles => {
40             all_ids => 'keys',
41             stat_for => 'get',
42             },
43             );
44              
45             ## no critic (ProhibitUnusedPrivateSubroutines)
46              
47             sub _build_stat_for {
48 51     51   112 my $self = shift;
49              
50 51         96 my %glb_freq_for;
51 51         297 tie my %seq_freq_for, 'Tie::IxHash';
52              
53 51         1274 my $regex = $self->gapmiss_regex;
54 51         183 my $glb_tot = 0;
55              
56             # loop through seqs to store state freqs
57 51         290 for my $seq ($self->all_seqs) {
58 4029         66249 my %freq_for;
59 4029         6099 my $seq_tot = 0;
60              
61             STATE:
62 4029         11681 for my $state ($seq->all_states) {
63 745365         1029186 $state = uc $state;
64              
65             # skip missing/gap states
66             # Note: This is different from what we do in SeqMask::Profiles
67             # so as to avoid these states to decrease regular state freqs
68 745365 100       1761686 next STATE if $state =~ m/$regex/xms;
69              
70             # store state occurrences both for current seq and globally
71 744035         1035143 $glb_freq_for{$state}++;
72 744035         973517 $freq_for{$state}++;
73 744035         921950 $glb_tot++;
74 744035         1018795 $seq_tot++;
75             }
76              
77             # convert occurrences to freqs for current seq
78 4029         81206 $freq_for{$_} /= $seq_tot for keys %freq_for;
79              
80             # store freqs for current seq
81 4029         19997 $seq_freq_for{ $seq->full_id } = \%freq_for;
82             }
83              
84             # convert global occurrences to freqs
85 51         1850 $glb_freq_for{$_} /= $glb_tot for keys %glb_freq_for;
86              
87 51         467 tie my %bias_for, 'Tie::IxHash';
88              
89             # compute bias for each seq
90             # according to Blanquart and Lartillot 2008
91 51         1348 for my $id (keys %seq_freq_for) {
92 4029         93770 while (my ($aa, $freq) = each %glb_freq_for) {
93 79237   100     1571482 $bias_for{$id} += ( ($seq_freq_for{$id}{$aa} // 0) - $freq ) ** 2;
94             }
95             }
96              
97             # compute global biases
98 51         2657 $bias_for{'GLOBALMAX'} = List::AllUtils::max( values %bias_for );
99 51         29274 $bias_for{'GLOBALMEAN'} = sum( values %bias_for ) / keys %bias_for;
100              
101 51         50695 return \%bias_for;
102             }
103              
104             ## use critic
105              
106             __PACKAGE__->meta->make_immutable;
107             1;
108              
109             __END__
110              
111             =pod
112              
113             =head1 NAME
114              
115             Bio::MUST::Core::PostPred::Composition - Posterior predictive test for compositional bias
116              
117             =head1 VERSION
118              
119             version 0.212670
120              
121             =head1 SYNOPSIS
122              
123             # TODO
124              
125             =head1 DESCRIPTION
126              
127             # TODO
128              
129             =head1 AUTHOR
130              
131             Denis BAURAIN <denis.baurain@uliege.be>
132              
133             =head1 COPYRIGHT AND LICENSE
134              
135             This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
136              
137             This is free software; you can redistribute it and/or modify it under
138             the same terms as the Perl 5 programming language system itself.
139              
140             =cut