File Coverage

lib/Bio/Roary/Output/NumberOfGroups.pm
Criterion Covered Total %
statement 76 76 100.0
branch 6 6 100.0
condition n/a
subroutine 9 9 100.0
pod 0 1 0.0
total 91 92 98.9


line stmt bran cond sub pod time code
1             package Bio::Roary::Output::NumberOfGroups;
2             $Bio::Roary::Output::NumberOfGroups::VERSION = '3.10.2';
3             # ABSTRACT: Create raw output files of group counts for turning into plots
4              
5              
6 2     2   529 use Moose;
  2         4  
  2         16  
7 2     2   11574 use List::Util qw(shuffle);
  2         3  
  2         139  
8 2     2   11 use Bio::Roary::AnnotateGroups;
  2         5  
  2         37  
9 2     2   8 use Bio::Roary::GroupStatistics;
  2         3  
  2         1131  
10              
11             has 'group_statistics_obj' => ( is => 'ro', isa => 'Bio::Roary::GroupStatistics', required => 1 );
12             has 'number_of_iterations' => ( is => 'ro', isa => 'Int', default => 10);
13             has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]' );
14             has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
15             has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
16              
17             has 'output_raw_filename_conserved_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_conserved_genes.Rtab' );
18             has 'output_raw_filename_unique_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_unique_genes.Rtab' );
19             has 'output_raw_filename_total_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_genes_in_pan_genome.Rtab' );
20             has 'output_raw_filename_new_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_new_genes.Rtab' );
21             has '_conserved_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
22             has '_unique_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
23             has '_total_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
24             has '_new_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
25              
26             sub create_output_files {
27 2     2 0 6 my ($self) = @_;
28              
29 2         79 for ( my $i = 0 ; $i < $self->number_of_iterations ; $i++ ) {
30 20         34 $self->_single_iteration_gene_expansion;
31             }
32              
33 2         44 $self->_create_raw_output_file( $self->output_raw_filename_conserved_genes, $self->_conserved_genes );
34 2         167 $self->_create_raw_output_file( $self->output_raw_filename_unique_genes, $self->_unique_genes );
35 2         58 $self->_create_raw_output_file( $self->output_raw_filename_total_genes, $self->_total_genes );
36 2         60 $self->_create_raw_output_file( $self->output_raw_filename_new_genes, $self->_new_genes );
37 2         11 return 1;
38             }
39              
40             sub _create_raw_output_file {
41 8     8   14 my ( $self, $filename, $output_data ) = @_;
42 8         333 open( my $fh, '>', $filename );
43 8         18 for my $iterations ( @{$output_data} ) {
  8         15  
44 80         73 print {$fh} join( "\t", @{$iterations} );
  80         71  
  80         169  
45 80         78 print {$fh} "\n";
  80         95  
46             }
47 8         171 close($fh);
48             }
49              
50             sub _shuffle_input_files {
51 20     20   25 my ($self) = @_;
52 20         20 my @shuffled_input_files = shuffle( @{ $self->group_statistics_obj->_sorted_file_names } );
  20         425  
53 20         40 return \@shuffled_input_files;
54             }
55              
56             sub _single_iteration_gene_expansion {
57 20     20   35 my ($self) = @_;
58 20         54 my %existing_groups;
59             my @conserved_genes_added_per_file;
60 20         0 my @unique_genes_added_per_file;
61 20         0 my @total_genes_added_per_file;
62 20         0 my @new_genes_added_per_file;
63              
64 20         27 my $shuffled_input_files = $self->_shuffle_input_files();
65              
66 20         25 my $files_counter = 1;
67 20         22 for my $input_file ( @{$shuffled_input_files} ) {
  20         32  
68 60         60 my $unique_groups_counter = 0;
69 60         61 my $total_groups_counter = 0;
70 60         58 my $new_group_counter = 0;
71 60         56 my $conserved_groups_counter = 0;
72 60         1290 my $new_groups = $self->group_statistics_obj->_files_to_groups->{$input_file};
73              
74 60         75 for my $group ( @{$new_groups} ) {
  60         75  
75 240 100       302 if ( !defined( $existing_groups{$group} ) ) {
76 140         140 $new_group_counter++;
77             }
78 240         285 $existing_groups{$group}++;
79             }
80              
81 60         119 for my $group ( keys %existing_groups ) {
82 340 100       6778 if ( $existing_groups{$group} >= ($files_counter*$self->core_definition) ) {
83 170         183 $conserved_groups_counter++;
84             }
85              
86 340 100       505 if ( $existing_groups{$group} == 1 ) {
87 220         234 $unique_groups_counter++;
88             }
89 340         380 $total_groups_counter++;
90             }
91              
92 60         94 push( @conserved_genes_added_per_file, $conserved_groups_counter );
93 60         63 push( @unique_genes_added_per_file, $unique_groups_counter );
94 60         58 push( @total_genes_added_per_file, $total_groups_counter );
95 60         66 push( @new_genes_added_per_file, $new_group_counter );
96 60         89 $files_counter++;
97             }
98 20         25 push( @{ $self->_conserved_genes }, \@conserved_genes_added_per_file );
  20         402  
99 20         21 push( @{ $self->_unique_genes }, \@unique_genes_added_per_file );
  20         383  
100 20         24 push( @{ $self->_total_genes }, \@total_genes_added_per_file );
  20         371  
101 20         37 push( @{ $self->_new_genes }, \@new_genes_added_per_file );
  20         384  
102              
103 20         466 return;
104             }
105              
106 2     2   14 no Moose;
  2         7  
  2         10  
107             __PACKAGE__->meta->make_immutable;
108              
109             1;
110              
111             __END__
112              
113             =pod
114              
115             =encoding UTF-8
116              
117             =head1 NAME
118              
119             Bio::Roary::Output::NumberOfGroups - Create raw output files of group counts for turning into plots
120              
121             =head1 VERSION
122              
123             version 3.10.2
124              
125             =head1 SYNOPSIS
126              
127             # ABSTRACT: Create raw output files of group counts for turning into plots
128             use Bio::Roary::Output::NumberOfGroups;
129              
130             my $obj = Bio::Roary::Output::NumberOfGroups->new(
131             group_statistics_obj => $group_stats
132             );
133             $obj->create_files();
134              
135             =head1 AUTHOR
136              
137             Andrew J. Page <ap13@sanger.ac.uk>
138              
139             =head1 COPYRIGHT AND LICENSE
140              
141             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
142              
143             This is free software, licensed under:
144              
145             The GNU General Public License, Version 3, June 2007
146              
147             =cut