File Coverage

lib/Bio/Roary/Output/QueryGroups.pm
Criterion Covered Total %
statement 86 86 100.0
branch 7 10 70.0
condition n/a
subroutine 17 17 100.0
pod 0 4 0.0
total 110 117 94.0


line stmt bran cond sub pod time code
1             package Bio::Roary::Output::QueryGroups;
2             $Bio::Roary::Output::QueryGroups::VERSION = '3.11.0';
3             # ABSTRACT: Output the groups of the union of a set of input isolates
4              
5              
6 3     3   451049 use Moose;
  3         12  
  3         20  
7 3     3   17201 use Bio::SeqIO;
  3         51531  
  3         75  
8 3     3   248 use Bio::Roary::Exceptions;
  3         3  
  3         94  
9 3     3   216 use Bio::Roary::AnalyseGroups;
  3         7  
  3         83  
10 3     3   1114 use POSIX;
  3         13711  
  3         14  
11              
12             has 'analyse_groups' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
13             has 'input_filenames' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
14             has 'output_union_filename' => ( is => 'ro', isa => 'Str', default => 'union_of_groups.gg' );
15             has 'output_intersection_filename' => ( is => 'ro', isa => 'Str', default => 'intersection_of_groups.gg' );
16             has 'output_complement_filename' => ( is => 'ro', isa => 'Str', default => 'complement_of_groups.gg' );
17             has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
18              
19             has '_groups_freq' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_freq' );
20             has '_groups_intersection' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups_intersection' );
21             has '_groups_complement' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups_complement' );
22             has '_groups' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups' );
23             has '_number_of_isolates' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_builder__number_of_isolates' );
24             has '_min_no_isolates_for_core' => ( is => 'rw', isa => 'Int', lazy_build => 1 );
25              
26             sub _build__min_no_isolates_for_core {
27 5     5   13 my ( $self ) = @_;
28 5         144 my $threshold = ceil( $self->_number_of_isolates * $self->core_definition );
29              
30 5         112 return $threshold;
31             }
32              
33             sub _builder__number_of_isolates {
34 5     5   16 my ($self) = @_;
35 5         11 return @{ $self->input_filenames };
  5         108  
36             }
37              
38             sub _build__groups_freq {
39 31     31   79 my ($self) = @_;
40 31         50 my %groups_freq;
41              
42 31         43 for my $filename ( @{ $self->input_filenames } ) {
  31         973  
43 68         1631 my $genes = $self->analyse_groups->_files_to_genes->{$filename};
44            
45 68         133 my %file_groups_seen;
46 68         89 for my $gene ( @{$genes} ) {
  68         145  
47 249 50       475 next if(!defined($gene));
48 249 50       5522 next if(!defined($self->analyse_groups->_genes_to_groups->{$gene}));
49 249 100       5416 next if(defined($file_groups_seen{$self->analyse_groups->_genes_to_groups->{$gene}}));
50            
51 246         339 push(@{$groups_freq{ $self->analyse_groups->_genes_to_groups->{$gene} }}, $gene);
  246         5392  
52 246         5709 $file_groups_seen{$self->analyse_groups->_genes_to_groups->{$gene}} = 1;
53             }
54             }
55              
56 31         819 return \%groups_freq;
57             }
58              
59             sub _build__groups {
60 31     31   84 my ($self) = @_;
61 31         54 my %groups_freq = %{ $self->_groups_freq };
  31         888  
62 31         319 my @groups = sort { @{$groups_freq{$b}} <=> @{$groups_freq{$a}} } keys %groups_freq;
  270         330  
  270         395  
  270         535  
63 31         913 return \@groups;
64             }
65              
66             sub _build__groups_intersection {
67 5     5   17 my ($self) = @_;
68 5         10 my @groups_intersection;
69              
70 5         9 for my $group ( @{$self->_groups} ) {
  5         144  
71 34 100       174 if ( scalar @{$self->_groups_freq->{$group}} >= $self->_min_no_isolates_for_core ) {
  34         685  
72 9         25 push( @groups_intersection, $group );
73             }
74             }
75 5         139 return \@groups_intersection;
76             }
77              
78             sub _build__groups_complement {
79 3     3   8 my ($self) = @_;
80 3         353 my %groups_intersection = map { $_ => 1 } @{ $self->_groups_intersection };
  6         17  
  3         72  
81 3         10 my @complement = grep { not $groups_intersection{$_} } @{ $self->_groups };
  21         34  
  3         66  
82 3         65 return \@complement;
83             }
84              
85             sub _print_out_groups {
86 34     34   85 my ( $self, $filename, $groups ) = @_;
87 34 50       2855 open( my $fh, '>', $filename )
88             or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => 'Couldnt write to file: ' . $filename );
89              
90 34         92 my %groups_freq = %{ $self->_groups_freq };
  34         1066  
91 34         100 my @sorted_groups = sort { @{$groups_freq{$b}} <=> @{$groups_freq{$a}} } @{$groups};
  99         123  
  99         132  
  99         187  
  34         111  
92              
93 34         125 for my $group ( @sorted_groups ) {
94 92         133 print {$fh} $group.': '.join("\t",@{$self->_groups_freq->{$group}}) . "\n";
  92         165  
  92         2134  
95             }
96 34         1695 close($fh);
97 34         469 return $self;
98             }
99              
100             sub groups_complement {
101 3     3 0 8 my ($self) = @_;
102 3         92 $self->_print_out_groups( $self->output_complement_filename, $self->_groups_complement );
103             }
104              
105             sub groups_intersection {
106 4     4 0 14 my ($self) = @_;
107 4         155 $self->_print_out_groups( $self->output_intersection_filename, $self->_groups_intersection );
108             }
109              
110             sub groups_union {
111 3     3 0 15 my ($self) = @_;
112 3         116 $self->_print_out_groups( $self->output_union_filename, $self->_groups );
113             }
114              
115             sub groups_with_external_inputs
116             {
117 24     24 0 53 my ($self, $output_filename,$groups) = @_;
118 24         91 $self->_print_out_groups( $output_filename, $groups );
119            
120             }
121              
122 3     3   8402 no Moose;
  3         6  
  3         20  
123             __PACKAGE__->meta->make_immutable;
124              
125             1;
126              
127             __END__
128              
129             =pod
130              
131             =encoding UTF-8
132              
133             =head1 NAME
134              
135             Bio::Roary::Output::QueryGroups - Output the groups of the union of a set of input isolates
136              
137             =head1 VERSION
138              
139             version 3.11.0
140              
141             =head1 SYNOPSIS
142              
143             Output the groups of the union of a set of input isolates
144             use Bio::Roary::Output::QueryGroups;
145              
146             my $obj = Bio::Roary::Output::QueryGroups->new(
147             analyse_groups => $analyse_groups
148             );
149             $obj->groups_union();
150             $obj->groups_intersection();
151             $obj->groups_complement();
152              
153             =head1 AUTHOR
154              
155             Andrew J. Page <ap13@sanger.ac.uk>
156              
157             =head1 COPYRIGHT AND LICENSE
158              
159             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
160              
161             This is free software, licensed under:
162              
163             The GNU General Public License, Version 3, June 2007
164              
165             =cut