File Coverage

lib/Bio/Roary/GroupStatistics.pm
Criterion Covered Total %
statement 138 144 95.8
branch 14 18 77.7
condition 4 6 66.6
subroutine 23 23 100.0
pod 0 3 0.0
total 179 194 92.2


line stmt bran cond sub pod time code
1             package Bio::Roary::GroupStatistics;
2             $Bio::Roary::GroupStatistics::VERSION = '3.10.2';
3             # ABSTRACT: Add labels to the groups
4              
5              
6 10     10   82799 use Moose;
  10         370065  
  10         89  
7 10     10   66367 use POSIX;
  10         40807  
  10         66  
8 10     10   24278 use Text::CSV;
  10         53502  
  10         435  
9 10     10   72 use File::Basename;
  10         33  
  10         622  
10 10     10   2759 use Bio::SeqIO;
  10         243953  
  10         352  
11 10     10   1567 use Bio::Roary::Exceptions;
  10         28  
  10         224  
12 10     10   1428 use Bio::Roary::AnalyseGroups;
  10         52  
  10         399  
13 10     10   2699 use Bio::Roary::AnnotateGroups;
  10         29  
  10         316  
14 10     10   3459 use Bio::Roary::PresenceAbsenceMatrix;
  10         31  
  10         12562  
15              
16             has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
17             has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
18             has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.csv' );
19             has 'output_rtab_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.Rtab' );
20             has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]');
21             has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
22             has '_text_csv_obj' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__text_csv_obj' );
23             has '_sorted_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__sorted_file_names' );
24             has '_groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' );
25             has '_files_to_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__files_to_groups' );
26             has '_num_files_in_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__num_files_in_groups' );
27             has '_verbose' => ( is => 'ro', isa => 'Bool', default => 0 );
28              
29              
30             sub _build__output_fh {
31 24     24   100 my ($self) = @_;
32 24 50       1088 open( my $fh, '>', $self->output_filename )
33             or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
34             error => "Couldnt write output file:" . $self->output_filename );
35 24         819 return $fh;
36             }
37              
38             sub _build__text_csv_obj {
39 24     24   121 my ($self) = @_;
40 24         706 return Text::CSV->new( { binary => 1, always_quote => 1, eol => "\r\n" } );
41             }
42              
43             sub fixed_headers {
44 435     435 0 552 my ($self) = @_;
45 435         1291 my @header =
46             ( 'Gene', 'Non-unique Gene name', 'Annotation', 'No. isolates', 'No. sequences', 'Avg sequences per isolate', 'Genome Fragment','Order within Fragment', 'Accessory Fragment','Accessory Order with Fragment', 'QC','Min group size nuc', 'Max group size nuc', 'Avg group size nuc' );
47 435         1260 return \@header;
48             }
49              
50             sub _sample_headers
51             {
52 25     25   95 my ($self) = @_;
53 25         43 my @header;
54 25         44 for my $filename ( @{ $self->_sorted_file_names } ) {
  25         836  
55 76         2949 my $filename_cpy = basename($filename);
56 76         282 $filename_cpy =~ s!\.gff\.proteome\.faa!!;
57 76         194 push( @header, $filename_cpy );
58             }
59 25         139 return \@header;
60             }
61              
62             sub _header {
63 24     24   79 my ($self) = @_;
64 24         60 my @header = @{ $self->fixed_headers };
  24         133  
65 24         78 push( @header, @{$self->_sample_headers});
  24         125  
66 24 100       895 push( @header, 'Inference' ) if ( $self->_verbose );
67 24         802 return \@header;
68             }
69              
70             sub _build__sorted_file_names {
71 25     25   78 my ($self) = @_;
72 25         58 my @sorted_file_names = sort( @{ $self->analyse_groups_obj->fasta_files } );
  25         758  
73 25         749 return \@sorted_file_names;
74             }
75              
76             sub _non_unique_name_for_group {
77 63     63   138 my ( $self, $annotated_group_name ) = @_;
78 63         130 my $duplicate_gene_name = '';
79 63         1614 my $prefix = $self->annotate_groups_obj->_group_default_prefix;
80 63 100       350 if ( $annotated_group_name =~ /$prefix/ ) {
81 51         1354 my $non_unique_name_for_group =
82             $self->annotate_groups_obj->_consensus_gene_name_for_group($annotated_group_name);
83 51 50       299 if ( !( $non_unique_name_for_group =~ /$prefix/ ) ) {
84 0         0 $duplicate_gene_name = $non_unique_name_for_group;
85             }
86             }
87 63         160 return $duplicate_gene_name;
88             }
89              
90             sub _build__groups_to_files {
91 22     22   60 my ($self) = @_;
92 22         42 my %groups_to_files;
93 22         36 for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
  22         661  
94 70         1873 my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
95 70         107 my %filenames;
96 70         95 for my $gene_name ( @{$genes} ) {
  70         131  
97 121         3054 my $filename = $self->analyse_groups_obj->_genes_to_file->{$gene_name};
98 121         192 push( @{ $filenames{$filename} }, $gene_name );
  121         471  
99             }
100 70         213 $groups_to_files{$group} = \%filenames;
101             }
102            
103 22         615 return \%groups_to_files;
104             }
105              
106             sub _build__files_to_groups
107             {
108 1     1   3 my ($self) = @_;
109 1         1 my %files_to_groups;
110            
111 1         2 for my $group (keys %{$self->_groups_to_files})
  1         21  
112             {
113 7         7 for my $filename (keys %{$self->_groups_to_files->{$group}})
  7         127  
114             {
115 12         14 push(@{$files_to_groups{$filename}}, $group);
  12         40  
116             }
117             }
118            
119 1         19 return \%files_to_groups;
120             }
121              
122             sub _build__num_files_in_groups
123             {
124 24     24   61 my ($self) = @_;
125 24         59 my %num_files_in_groups;
126 24         52 for my $group (@{ $self->annotate_groups_obj->_groups })
  24         807  
127             {
128 63         1677 my $num_files = $self->analyse_groups_obj->_count_num_files_in_group( $self->annotate_groups_obj->_groups_to_id_names->{$group});
129 63         185 $num_files_in_groups{$group} = $num_files;
130             }
131 24         724 return \%num_files_in_groups;
132             }
133              
134             sub _row {
135 63     63   155 my ( $self, $group ) = @_;
136 63         1672 my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
137              
138 63         1661 my $num_isolates_in_group = $self->analyse_groups_obj->_count_num_files_in_group($genes);
139 63         123 my $num_sequences_in_group = $#{$genes} + 1;
  63         149  
140 63         441 my $avg_sequences_per_isolate = ceil( ( $num_sequences_in_group / $num_isolates_in_group ) * 100 ) / 100;
141              
142 63         1849 my $annotation = $self->annotate_groups_obj->consensus_product_for_id_names($genes);
143 63         1766 my $annotated_group_name = $self->annotate_groups_obj->_groups_to_consensus_gene_names->{$group};
144              
145 63         214 my $duplicate_gene_name = $self->_non_unique_name_for_group($annotated_group_name);
146            
147 63         136 my $genome_number = '';
148 63         119 my $qc_comment = '';
149 63         116 my $order_within_fragement = '';
150 63         92 my $accessory_order_within_fragement = '';
151 63         92 my $accessory_genome_number = '';
152 63 50 66     1889 if(defined($self->groups_to_contigs) && defined($self->groups_to_contigs->{$annotated_group_name}))
153             {
154 0         0 $genome_number = $self->groups_to_contigs->{$annotated_group_name}->{label};
155 0         0 $qc_comment = $self->groups_to_contigs->{$annotated_group_name}->{comment};
156 0         0 $order_within_fragement = $self->groups_to_contigs->{$annotated_group_name}->{order};
157            
158 0         0 $accessory_genome_number = $self->groups_to_contigs->{$annotated_group_name}->{accessory_label};
159 0         0 $accessory_order_within_fragement = $self->groups_to_contigs->{$annotated_group_name}->{accessory_order};
160             }
161            
162 63         1578 my $group_size = $self->annotate_groups_obj->group_nucleotide_lengths->{$group};
163            
164             my @row = (
165             $annotated_group_name, $duplicate_gene_name, $annotation,
166             $num_isolates_in_group, $num_sequences_in_group, $avg_sequences_per_isolate,$genome_number,$order_within_fragement,$accessory_genome_number,$accessory_order_within_fragement,$qc_comment,$group_size->{min}, $group_size->{max}, $group_size->{average}
167 63         312 );
168            
169 63         216 for(my $i =0; $i < @row; $i++)
170             {
171 882 100       2148 if(!defined($row[$i]))
172             {
173 135         310 $row[$i] = '';
174             }
175             }
176              
177 63         110 for my $filename ( @{ $self->_sorted_file_names } ) {
  63         1732  
178 196         4996 my $group_to_file_genes = $self->_groups_to_files->{$group}->{$filename};
179              
180 196 100 66     528 if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
  109         372  
181              
182 109         169 push( @row, join( "\t", @{$group_to_file_genes} ) );
  109         384  
183 109         237 next;
184             }
185             else {
186 87         290 push( @row, '' );
187             }
188             }
189              
190             ## ADD INFERENCE AND FULL ANNOTATION IF VERBOSE REQUESTED ##
191 63 100       1553 if ( $self->_verbose ){
192 7         16 my ( $full_annotation, $inference );
193 7         124 $row[2] = $self->annotate_groups_obj->full_annotation($group);
194 7         137 push( @row, $self->annotate_groups_obj->inference($group) );
195             }
196              
197 63         1080 return \@row;
198             }
199              
200             sub create_rtab
201             {
202 1     1 0 3 my ($self) = @_;
203 1         32 my $presence_absence_matrix_obj = Bio::Roary::PresenceAbsenceMatrix->new(
204             output_filename => $self->output_rtab_filename,
205             annotate_groups_obj => $self->annotate_groups_obj,
206             sorted_file_names => $self->_sorted_file_names,
207             groups_to_files => $self->_groups_to_files,
208             num_files_in_groups => $self->_num_files_in_groups,
209             sample_headers => $self->_sample_headers,
210             );
211 1         7 $presence_absence_matrix_obj->create_matrix_file;
212 1         27 return $self;
213             }
214              
215             sub create_spreadsheet {
216 24     24 0 82 my ($self) = @_;
217              
218 24         977 $self->_text_csv_obj->print( $self->_output_fh, $self->_header );
219              
220 24 50       477 for my $group (sort {$self->_num_files_in_groups->{$b}<=>$self->_num_files_in_groups->{$a} || $a cmp $b} keys %{$self->_num_files_in_groups}){
  67         1609  
  24         897  
221 63         2187 $self->_text_csv_obj->print( $self->_output_fh, $self->_row($group) );
222             }
223 24         1079 close( $self->_output_fh );
224             }
225              
226 10     10   98 no Moose;
  10         23  
  10         68  
227             __PACKAGE__->meta->make_immutable;
228              
229             1;
230              
231             __END__
232              
233             =pod
234              
235             =encoding UTF-8
236              
237             =head1 NAME
238              
239             Bio::Roary::GroupStatistics - Add labels to the groups
240              
241             =head1 VERSION
242              
243             version 3.10.2
244              
245             =head1 SYNOPSIS
246              
247             Add labels to the groups
248             use Bio::Roary::GroupStatistics;
249              
250             my $obj = Bio::Roary::GroupStatistics->new(
251             output_filename => 'group_statitics.csv',
252             annotate_groups_obj => $annotate_groups_obj,
253             analyse_groups_obj => $analyse_groups_obj
254             );
255             $obj->create_spreadsheet;
256              
257             =head1 AUTHOR
258              
259             Andrew J. Page <ap13@sanger.ac.uk>
260              
261             =head1 COPYRIGHT AND LICENSE
262              
263             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
264              
265             This is free software, licensed under:
266              
267             The GNU General Public License, Version 3, June 2007
268              
269             =cut