File Coverage

lib/Bio/Roary/Output/QueryGroups.pm
Criterion Covered Total %
statement 86 86 100.0
branch 7 10 70.0
condition n/a
subroutine 17 17 100.0
pod 0 4 0.0
total 110 117 94.0


line stmt bran cond sub pod time code
1             package Bio::Roary::Output::QueryGroups;
2             $Bio::Roary::Output::QueryGroups::VERSION = '3.10.2';
3             # ABSTRACT: Output the groups of the union of a set of input isolates
4              
5              
6 3     3   430916 use Moose;
  3         5  
  3         20  
7 3     3   18304 use Bio::SeqIO;
  3         49507  
  3         93  
8 3     3   239 use Bio::Roary::Exceptions;
  3         6  
  3         77  
9 3     3   229 use Bio::Roary::AnalyseGroups;
  3         7  
  3         91  
10 3     3   1203 use POSIX;
  3         15513  
  3         13  
11              
12             has 'analyse_groups' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
13             has 'input_filenames' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
14             has 'output_union_filename' => ( is => 'ro', isa => 'Str', default => 'union_of_groups.gg' );
15             has 'output_intersection_filename' => ( is => 'ro', isa => 'Str', default => 'intersection_of_groups.gg' );
16             has 'output_complement_filename' => ( is => 'ro', isa => 'Str', default => 'complement_of_groups.gg' );
17             has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
18              
19             has '_groups_freq' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_freq' );
20             has '_groups_intersection' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups_intersection' );
21             has '_groups_complement' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups_complement' );
22             has '_groups' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups' );
23             has '_number_of_isolates' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_builder__number_of_isolates' );
24             has '_min_no_isolates_for_core' => ( is => 'rw', isa => 'Int', lazy_build => 1 );
25              
26             sub _build__min_no_isolates_for_core {
27 5     5   14 my ( $self ) = @_;
28 5         131 my $threshold = ceil( $self->_number_of_isolates * $self->core_definition );
29              
30 5         248 return $threshold;
31             }
32              
33             sub _builder__number_of_isolates {
34 5     5   344 my ($self) = @_;
35 5         14 return @{ $self->input_filenames };
  5         129  
36             }
37              
38             sub _build__groups_freq {
39 31     31   74 my ($self) = @_;
40 31         52 my %groups_freq;
41              
42 31         46 for my $filename ( @{ $self->input_filenames } ) {
  31         849  
43 68         1702 my $genes = $self->analyse_groups->_files_to_genes->{$filename};
44            
45 68         113 my %file_groups_seen;
46 68         89 for my $gene ( @{$genes} ) {
  68         150  
47 249 50       515 next if(!defined($gene));
48 249 50       5937 next if(!defined($self->analyse_groups->_genes_to_groups->{$gene}));
49 249 100       5738 next if(defined($file_groups_seen{$self->analyse_groups->_genes_to_groups->{$gene}}));
50            
51 246         347 push(@{$groups_freq{ $self->analyse_groups->_genes_to_groups->{$gene} }}, $gene);
  246         5726  
52 246         5879 $file_groups_seen{$self->analyse_groups->_genes_to_groups->{$gene}} = 1;
53             }
54             }
55              
56 31         802 return \%groups_freq;
57             }
58              
59             sub _build__groups {
60 31     31   79 my ($self) = @_;
61 31         47 my %groups_freq = %{ $self->_groups_freq };
  31         796  
62 31         266 my @groups = sort { @{$groups_freq{$b}} <=> @{$groups_freq{$a}} } keys %groups_freq;
  256         295  
  256         326  
  256         464  
63 31         928 return \@groups;
64             }
65              
66             sub _build__groups_intersection {
67 5     5   12 my ($self) = @_;
68 5         9 my @groups_intersection;
69              
70 5         34 for my $group ( @{$self->_groups} ) {
  5         115  
71 34 100       90 if ( scalar @{$self->_groups_freq->{$group}} >= $self->_min_no_isolates_for_core ) {
  34         666  
72 9         25 push( @groups_intersection, $group );
73             }
74             }
75 5         118 return \@groups_intersection;
76             }
77              
78             sub _build__groups_complement {
79 3     3   8 my ($self) = @_;
80 3         8 my %groups_intersection = map { $_ => 1 } @{ $self->_groups_intersection };
  6         16  
  3         62  
81 3         5 my @complement = grep { not $groups_intersection{$_} } @{ $self->_groups };
  21         33  
  3         66  
82 3         66 return \@complement;
83             }
84              
85             sub _print_out_groups {
86 34     34   71 my ( $self, $filename, $groups ) = @_;
87 34 50       2712 open( my $fh, '>', $filename )
88             or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => 'Couldnt write to file: ' . $filename );
89              
90 34         96 my %groups_freq = %{ $self->_groups_freq };
  34         1106  
91 34         88 my @sorted_groups = sort { @{$groups_freq{$b}} <=> @{$groups_freq{$a}} } @{$groups};
  99         137  
  99         121  
  99         191  
  34         175  
92              
93 34         97 for my $group ( @sorted_groups ) {
94 92         122 print {$fh} $group.': '.join("\t",@{$self->_groups_freq->{$group}}) . "\n";
  92         192  
  92         2121  
95             }
96 34         1626 close($fh);
97 34         468 return $self;
98             }
99              
100             sub groups_complement {
101 3     3 0 8 my ($self) = @_;
102 3         102 $self->_print_out_groups( $self->output_complement_filename, $self->_groups_complement );
103             }
104              
105             sub groups_intersection {
106 4     4 0 14 my ($self) = @_;
107 4         145 $self->_print_out_groups( $self->output_intersection_filename, $self->_groups_intersection );
108             }
109              
110             sub groups_union {
111 3     3 0 15 my ($self) = @_;
112 3         104 $self->_print_out_groups( $self->output_union_filename, $self->_groups );
113             }
114              
115             sub groups_with_external_inputs
116             {
117 24     24 0 73 my ($self, $output_filename,$groups) = @_;
118 24         75 $self->_print_out_groups( $output_filename, $groups );
119            
120             }
121              
122 3     3   8671 no Moose;
  3         6  
  3         33  
123             __PACKAGE__->meta->make_immutable;
124              
125             1;
126              
127             __END__
128              
129             =pod
130              
131             =encoding UTF-8
132              
133             =head1 NAME
134              
135             Bio::Roary::Output::QueryGroups - Output the groups of the union of a set of input isolates
136              
137             =head1 VERSION
138              
139             version 3.10.2
140              
141             =head1 SYNOPSIS
142              
143             Output the groups of the union of a set of input isolates
144             use Bio::Roary::Output::QueryGroups;
145              
146             my $obj = Bio::Roary::Output::QueryGroups->new(
147             analyse_groups => $analyse_groups
148             );
149             $obj->groups_union();
150             $obj->groups_intersection();
151             $obj->groups_complement();
152              
153             =head1 AUTHOR
154              
155             Andrew J. Page <ap13@sanger.ac.uk>
156              
157             =head1 COPYRIGHT AND LICENSE
158              
159             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
160              
161             This is free software, licensed under:
162              
163             The GNU General Public License, Version 3, June 2007
164              
165             =cut