File Coverage

lib/Bio/Roary/Output/NumberOfGroups.pm
Criterion Covered Total %
statement 76 76 100.0
branch 6 6 100.0
condition n/a
subroutine 9 9 100.0
pod 0 1 0.0
total 91 92 98.9


line stmt bran cond sub pod time code
1             package Bio::Roary::Output::NumberOfGroups;
2             $Bio::Roary::Output::NumberOfGroups::VERSION = '3.11.0';
3             # ABSTRACT: Create raw output files of group counts for turning into plots
4              
5              
6 2     2   652 use Moose;
  2         3  
  2         13  
7 2     2   12097 use List::Util qw(shuffle);
  2         5  
  2         141  
8 2     2   12 use Bio::Roary::AnnotateGroups;
  2         4  
  2         38  
9 2     2   8 use Bio::Roary::GroupStatistics;
  2         4  
  2         1211  
10              
11             has 'group_statistics_obj' => ( is => 'ro', isa => 'Bio::Roary::GroupStatistics', required => 1 );
12             has 'number_of_iterations' => ( is => 'ro', isa => 'Int', default => 10);
13             has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]' );
14             has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
15             has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
16              
17             has 'output_raw_filename_conserved_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_conserved_genes.Rtab' );
18             has 'output_raw_filename_unique_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_unique_genes.Rtab' );
19             has 'output_raw_filename_total_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_genes_in_pan_genome.Rtab' );
20             has 'output_raw_filename_new_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_new_genes.Rtab' );
21             has '_conserved_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
22             has '_unique_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
23             has '_total_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
24             has '_new_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
25              
26             sub create_output_files {
27 2     2 0 8 my ($self) = @_;
28              
29 2         88 for ( my $i = 0 ; $i < $self->number_of_iterations ; $i++ ) {
30 20         50 $self->_single_iteration_gene_expansion;
31             }
32              
33 2         55 $self->_create_raw_output_file( $self->output_raw_filename_conserved_genes, $self->_conserved_genes );
34 2         103 $self->_create_raw_output_file( $self->output_raw_filename_unique_genes, $self->_unique_genes );
35 2         71 $self->_create_raw_output_file( $self->output_raw_filename_total_genes, $self->_total_genes );
36 2         60 $self->_create_raw_output_file( $self->output_raw_filename_new_genes, $self->_new_genes );
37 2         25 return 1;
38             }
39              
40             sub _create_raw_output_file {
41 8     8   17 my ( $self, $filename, $output_data ) = @_;
42 8         473 open( my $fh, '>', $filename );
43 8         17 for my $iterations ( @{$output_data} ) {
  8         21  
44 80         91 print {$fh} join( "\t", @{$iterations} );
  80         97  
  80         221  
45 80         95 print {$fh} "\n";
  80         122  
46             }
47 8         251 close($fh);
48             }
49              
50             sub _shuffle_input_files {
51 20     20   28 my ($self) = @_;
52 20         21 my @shuffled_input_files = shuffle( @{ $self->group_statistics_obj->_sorted_file_names } );
  20         502  
53 20         47 return \@shuffled_input_files;
54             }
55              
56             sub _single_iteration_gene_expansion {
57 20     20   37 my ($self) = @_;
58 20         62 my %existing_groups;
59             my @conserved_genes_added_per_file;
60 20         0 my @unique_genes_added_per_file;
61 20         0 my @total_genes_added_per_file;
62 20         0 my @new_genes_added_per_file;
63              
64 20         35 my $shuffled_input_files = $self->_shuffle_input_files();
65              
66 20         27 my $files_counter = 1;
67 20         21 for my $input_file ( @{$shuffled_input_files} ) {
  20         37  
68 60         83 my $unique_groups_counter = 0;
69 60         66 my $total_groups_counter = 0;
70 60         75 my $new_group_counter = 0;
71 60         55 my $conserved_groups_counter = 0;
72 60         1597 my $new_groups = $self->group_statistics_obj->_files_to_groups->{$input_file};
73              
74 60         94 for my $group ( @{$new_groups} ) {
  60         89  
75 240 100       340 if ( !defined( $existing_groups{$group} ) ) {
76 140         142 $new_group_counter++;
77             }
78 240         322 $existing_groups{$group}++;
79             }
80              
81 60         163 for my $group ( keys %existing_groups ) {
82 340 100       7804 if ( $existing_groups{$group} >= ($files_counter*$self->core_definition) ) {
83 170         212 $conserved_groups_counter++;
84             }
85              
86 340 100       571 if ( $existing_groups{$group} == 1 ) {
87 220         244 $unique_groups_counter++;
88             }
89 340         410 $total_groups_counter++;
90             }
91              
92 60         121 push( @conserved_genes_added_per_file, $conserved_groups_counter );
93 60         71 push( @unique_genes_added_per_file, $unique_groups_counter );
94 60         76 push( @total_genes_added_per_file, $total_groups_counter );
95 60         68 push( @new_genes_added_per_file, $new_group_counter );
96 60         92 $files_counter++;
97             }
98 20         26 push( @{ $self->_conserved_genes }, \@conserved_genes_added_per_file );
  20         474  
99 20         29 push( @{ $self->_unique_genes }, \@unique_genes_added_per_file );
  20         615  
100 20         28 push( @{ $self->_total_genes }, \@total_genes_added_per_file );
  20         438  
101 20         30 push( @{ $self->_new_genes }, \@new_genes_added_per_file );
  20         422  
102              
103 20         640 return;
104             }
105              
106 2     2   14 no Moose;
  2         4  
  2         10  
107             __PACKAGE__->meta->make_immutable;
108              
109             1;
110              
111             __END__
112              
113             =pod
114              
115             =encoding UTF-8
116              
117             =head1 NAME
118              
119             Bio::Roary::Output::NumberOfGroups - Create raw output files of group counts for turning into plots
120              
121             =head1 VERSION
122              
123             version 3.11.0
124              
125             =head1 SYNOPSIS
126              
127             # ABSTRACT: Create raw output files of group counts for turning into plots
128             use Bio::Roary::Output::NumberOfGroups;
129              
130             my $obj = Bio::Roary::Output::NumberOfGroups->new(
131             group_statistics_obj => $group_stats
132             );
133             $obj->create_files();
134              
135             =head1 AUTHOR
136              
137             Andrew J. Page <ap13@sanger.ac.uk>
138              
139             =head1 COPYRIGHT AND LICENSE
140              
141             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
142              
143             This is free software, licensed under:
144              
145             The GNU General Public License, Version 3, June 2007
146              
147             =cut