File Coverage

lib/Bio/Roary/GroupStatistics.pm
Criterion Covered Total %
statement 138 144 95.8
branch 14 18 77.7
condition 4 6 66.6
subroutine 23 23 100.0
pod 0 3 0.0
total 179 194 92.2


line stmt bran cond sub pod time code
1             package Bio::Roary::GroupStatistics;
2             $Bio::Roary::GroupStatistics::VERSION = '3.11.0';
3             # ABSTRACT: Add labels to the groups
4              
5              
6 10     10   105312 use Moose;
  10         429493  
  10         75  
7 10     10   63461 use POSIX;
  10         42070  
  10         54  
8 10     10   23195 use Text::CSV;
  10         54890  
  10         407  
9 10     10   68 use File::Basename;
  10         28  
  10         605  
10 10     10   2720 use Bio::SeqIO;
  10         255644  
  10         336  
11 10     10   1605 use Bio::Roary::Exceptions;
  10         21  
  10         258  
12 10     10   1398 use Bio::Roary::AnalyseGroups;
  10         36  
  10         430  
13 10     10   2696 use Bio::Roary::AnnotateGroups;
  10         23  
  10         307  
14 10     10   3347 use Bio::Roary::PresenceAbsenceMatrix;
  10         37  
  10         12253  
15              
16             has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
17             has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
18             has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.csv' );
19             has 'output_rtab_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.Rtab' );
20             has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]');
21             has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
22             has '_text_csv_obj' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__text_csv_obj' );
23             has '_sorted_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__sorted_file_names' );
24             has '_groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' );
25             has '_files_to_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__files_to_groups' );
26             has '_num_files_in_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__num_files_in_groups' );
27             has '_verbose' => ( is => 'ro', isa => 'Bool', default => 0 );
28              
29              
30             sub _build__output_fh {
31 24     24   94 my ($self) = @_;
32 24 50       1002 open( my $fh, '>', $self->output_filename )
33             or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
34             error => "Couldnt write output file:" . $self->output_filename );
35 24         808 return $fh;
36             }
37              
38             sub _build__text_csv_obj {
39 24     24   82 my ($self) = @_;
40 24         679 return Text::CSV->new( { binary => 1, always_quote => 1, eol => "\r\n" } );
41             }
42              
43             sub fixed_headers {
44 435     435 0 597 my ($self) = @_;
45 435         1419 my @header =
46             ( 'Gene', 'Non-unique Gene name', 'Annotation', 'No. isolates', 'No. sequences', 'Avg sequences per isolate', 'Genome Fragment','Order within Fragment', 'Accessory Fragment','Accessory Order with Fragment', 'QC','Min group size nuc', 'Max group size nuc', 'Avg group size nuc' );
47 435         1189 return \@header;
48             }
49              
50             sub _sample_headers
51             {
52 25     25   71 my ($self) = @_;
53 25         48 my @header;
54 25         51 for my $filename ( @{ $self->_sorted_file_names } ) {
  25         811  
55 76         2899 my $filename_cpy = basename($filename);
56 76         265 $filename_cpy =~ s!\.gff\.proteome\.faa!!;
57 76         191 push( @header, $filename_cpy );
58             }
59 25         145 return \@header;
60             }
61              
62             sub _header {
63 24     24   86 my ($self) = @_;
64 24         51 my @header = @{ $self->fixed_headers };
  24         138  
65 24         76 push( @header, @{$self->_sample_headers});
  24         126  
66 24 100       735 push( @header, 'Inference' ) if ( $self->_verbose );
67 24         798 return \@header;
68             }
69              
70             sub _build__sorted_file_names {
71 25     25   60 my ($self) = @_;
72 25         51 my @sorted_file_names = sort( @{ $self->analyse_groups_obj->fasta_files } );
  25         752  
73 25         662 return \@sorted_file_names;
74             }
75              
76             sub _non_unique_name_for_group {
77 63     63   166 my ( $self, $annotated_group_name ) = @_;
78 63         113 my $duplicate_gene_name = '';
79 63         1467 my $prefix = $self->annotate_groups_obj->_group_default_prefix;
80 63 100       311 if ( $annotated_group_name =~ /$prefix/ ) {
81 51         1222 my $non_unique_name_for_group =
82             $self->annotate_groups_obj->_consensus_gene_name_for_group($annotated_group_name);
83 51 50       289 if ( !( $non_unique_name_for_group =~ /$prefix/ ) ) {
84 0         0 $duplicate_gene_name = $non_unique_name_for_group;
85             }
86             }
87 63         145 return $duplicate_gene_name;
88             }
89              
90             sub _build__groups_to_files {
91 22     22   62 my ($self) = @_;
92 22         34 my %groups_to_files;
93 22         37 for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
  22         587  
94 70         1692 my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
95 70         125 my %filenames;
96 70         110 for my $gene_name ( @{$genes} ) {
  70         199  
97 121         2763 my $filename = $self->analyse_groups_obj->_genes_to_file->{$gene_name};
98 121         176 push( @{ $filenames{$filename} }, $gene_name );
  121         457  
99             }
100 70         195 $groups_to_files{$group} = \%filenames;
101             }
102            
103 22         591 return \%groups_to_files;
104             }
105              
106             sub _build__files_to_groups
107             {
108 1     1   2 my ($self) = @_;
109 1         2 my %files_to_groups;
110            
111 1         6 for my $group (keys %{$self->_groups_to_files})
  1         27  
112             {
113 7         6 for my $filename (keys %{$self->_groups_to_files->{$group}})
  7         164  
114             {
115 12         19 push(@{$files_to_groups{$filename}}, $group);
  12         28  
116             }
117             }
118            
119 1         20 return \%files_to_groups;
120             }
121              
122             sub _build__num_files_in_groups
123             {
124 24     24   59 my ($self) = @_;
125 24         46 my %num_files_in_groups;
126 24         52 for my $group (@{ $self->annotate_groups_obj->_groups })
  24         712  
127             {
128 63         1659 my $num_files = $self->analyse_groups_obj->_count_num_files_in_group( $self->annotate_groups_obj->_groups_to_id_names->{$group});
129 63         195 $num_files_in_groups{$group} = $num_files;
130             }
131 24         644 return \%num_files_in_groups;
132             }
133              
134             sub _row {
135 63     63   144 my ( $self, $group ) = @_;
136 63         1570 my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
137              
138 63         1430 my $num_isolates_in_group = $self->analyse_groups_obj->_count_num_files_in_group($genes);
139 63         100 my $num_sequences_in_group = $#{$genes} + 1;
  63         138  
140 63         441 my $avg_sequences_per_isolate = ceil( ( $num_sequences_in_group / $num_isolates_in_group ) * 100 ) / 100;
141              
142 63         1646 my $annotation = $self->annotate_groups_obj->consensus_product_for_id_names($genes);
143 63         1515 my $annotated_group_name = $self->annotate_groups_obj->_groups_to_consensus_gene_names->{$group};
144              
145 63         206 my $duplicate_gene_name = $self->_non_unique_name_for_group($annotated_group_name);
146            
147 63         137 my $genome_number = '';
148 63         121 my $qc_comment = '';
149 63         87 my $order_within_fragement = '';
150 63         144 my $accessory_order_within_fragement = '';
151 63         104 my $accessory_genome_number = '';
152 63 50 66     1641 if(defined($self->groups_to_contigs) && defined($self->groups_to_contigs->{$annotated_group_name}))
153             {
154 0         0 $genome_number = $self->groups_to_contigs->{$annotated_group_name}->{label};
155 0         0 $qc_comment = $self->groups_to_contigs->{$annotated_group_name}->{comment};
156 0         0 $order_within_fragement = $self->groups_to_contigs->{$annotated_group_name}->{order};
157            
158 0         0 $accessory_genome_number = $self->groups_to_contigs->{$annotated_group_name}->{accessory_label};
159 0         0 $accessory_order_within_fragement = $self->groups_to_contigs->{$annotated_group_name}->{accessory_order};
160             }
161            
162 63         1544 my $group_size = $self->annotate_groups_obj->group_nucleotide_lengths->{$group};
163            
164             my @row = (
165             $annotated_group_name, $duplicate_gene_name, $annotation,
166             $num_isolates_in_group, $num_sequences_in_group, $avg_sequences_per_isolate,$genome_number,$order_within_fragement,$accessory_genome_number,$accessory_order_within_fragement,$qc_comment,$group_size->{min}, $group_size->{max}, $group_size->{average}
167 63         302 );
168            
169 63         186 for(my $i =0; $i < @row; $i++)
170             {
171 882 100       1652 if(!defined($row[$i]))
172             {
173 135         256 $row[$i] = '';
174             }
175             }
176              
177 63         96 for my $filename ( @{ $self->_sorted_file_names } ) {
  63         1620  
178 196         4545 my $group_to_file_genes = $self->_groups_to_files->{$group}->{$filename};
179              
180 196 100 66     503 if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
  109         370  
181              
182 109         156 push( @row, join( "\t", @{$group_to_file_genes} ) );
  109         321  
183 109         243 next;
184             }
185             else {
186 87         266 push( @row, '' );
187             }
188             }
189              
190             ## ADD INFERENCE AND FULL ANNOTATION IF VERBOSE REQUESTED ##
191 63 100       1409 if ( $self->_verbose ){
192 7         11 my ( $full_annotation, $inference );
193 7         132 $row[2] = $self->annotate_groups_obj->full_annotation($group);
194 7         154 push( @row, $self->annotate_groups_obj->inference($group) );
195             }
196              
197 63         1178 return \@row;
198             }
199              
200             sub create_rtab
201             {
202 1     1 0 4 my ($self) = @_;
203 1         33 my $presence_absence_matrix_obj = Bio::Roary::PresenceAbsenceMatrix->new(
204             output_filename => $self->output_rtab_filename,
205             annotate_groups_obj => $self->annotate_groups_obj,
206             sorted_file_names => $self->_sorted_file_names,
207             groups_to_files => $self->_groups_to_files,
208             num_files_in_groups => $self->_num_files_in_groups,
209             sample_headers => $self->_sample_headers,
210             );
211 1         9 $presence_absence_matrix_obj->create_matrix_file;
212 1         29 return $self;
213             }
214              
215             sub create_spreadsheet {
216 24     24 0 87 my ($self) = @_;
217              
218 24         892 $self->_text_csv_obj->print( $self->_output_fh, $self->_header );
219              
220 24 50       412 for my $group (sort {$self->_num_files_in_groups->{$b}<=>$self->_num_files_in_groups->{$a} || $a cmp $b} keys %{$self->_num_files_in_groups}){
  71         1635  
  24         786  
221 63         2036 $self->_text_csv_obj->print( $self->_output_fh, $self->_row($group) );
222             }
223 24         727 close( $self->_output_fh );
224             }
225              
226 10     10   96 no Moose;
  10         35  
  10         79  
227             __PACKAGE__->meta->make_immutable;
228              
229             1;
230              
231             __END__
232              
233             =pod
234              
235             =encoding UTF-8
236              
237             =head1 NAME
238              
239             Bio::Roary::GroupStatistics - Add labels to the groups
240              
241             =head1 VERSION
242              
243             version 3.11.0
244              
245             =head1 SYNOPSIS
246              
247             Add labels to the groups
248             use Bio::Roary::GroupStatistics;
249              
250             my $obj = Bio::Roary::GroupStatistics->new(
251             output_filename => 'group_statitics.csv',
252             annotate_groups_obj => $annotate_groups_obj,
253             analyse_groups_obj => $analyse_groups_obj
254             );
255             $obj->create_spreadsheet;
256              
257             =head1 AUTHOR
258              
259             Andrew J. Page <ap13@sanger.ac.uk>
260              
261             =head1 COPYRIGHT AND LICENSE
262              
263             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
264              
265             This is free software, licensed under:
266              
267             The GNU General Public License, Version 3, June 2007
268              
269             =cut