File Coverage

lib/Bio/Roary/GroupStatistics.pm
Criterion Covered Total %
statement 138 144 95.8
branch 14 18 77.7
condition 4 6 66.6
subroutine 23 23 100.0
pod 0 3 0.0
total 179 194 92.2


line stmt bran cond sub pod time code
1             package Bio::Roary::GroupStatistics;
2             $Bio::Roary::GroupStatistics::VERSION = '3.10.1';
3             # ABSTRACT: Add labels to the groups
4              
5              
6 10     10   101230 use Moose;
  10         463086  
  10         85  
7 10     10   71407 use POSIX;
  10         48806  
  10         68  
8 10     10   23267 use Text::CSV;
  10         53049  
  10         407  
9 10     10   69 use File::Basename;
  10         24  
  10         756  
10 10     10   2810 use Bio::SeqIO;
  10         272096  
  10         328  
11 10     10   1600 use Bio::Roary::Exceptions;
  10         30  
  10         256  
12 10     10   1441 use Bio::Roary::AnalyseGroups;
  10         44  
  10         387  
13 10     10   2910 use Bio::Roary::AnnotateGroups;
  10         31  
  10         369  
14 10     10   3832 use Bio::Roary::PresenceAbsenceMatrix;
  10         36  
  10         12975  
15              
16             has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
17             has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
18             has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.csv' );
19             has 'output_rtab_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.Rtab' );
20             has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]');
21             has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
22             has '_text_csv_obj' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__text_csv_obj' );
23             has '_sorted_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__sorted_file_names' );
24             has '_groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' );
25             has '_files_to_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__files_to_groups' );
26             has '_num_files_in_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__num_files_in_groups' );
27             has '_verbose' => ( is => 'ro', isa => 'Bool', default => 0 );
28              
29              
30             sub _build__output_fh {
31 24     24   118 my ($self) = @_;
32 24 50       1128 open( my $fh, '>', $self->output_filename )
33             or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
34             error => "Couldnt write output file:" . $self->output_filename );
35 24         906 return $fh;
36             }
37              
38             sub _build__text_csv_obj {
39 24     24   139 my ($self) = @_;
40 24         776 return Text::CSV->new( { binary => 1, always_quote => 1, eol => "\r\n" } );
41             }
42              
43             sub fixed_headers {
44 435     435 0 547 my ($self) = @_;
45 435         1396 my @header =
46             ( 'Gene', 'Non-unique Gene name', 'Annotation', 'No. isolates', 'No. sequences', 'Avg sequences per isolate', 'Genome Fragment','Order within Fragment', 'Accessory Fragment','Accessory Order with Fragment', 'QC','Min group size nuc', 'Max group size nuc', 'Avg group size nuc' );
47 435         1213 return \@header;
48             }
49              
50             sub _sample_headers
51             {
52 25     25   68 my ($self) = @_;
53 25         57 my @header;
54 25         48 for my $filename ( @{ $self->_sorted_file_names } ) {
  25         1028  
55 76         3489 my $filename_cpy = basename($filename);
56 76         311 $filename_cpy =~ s!\.gff\.proteome\.faa!!;
57 76         238 push( @header, $filename_cpy );
58             }
59 25         200 return \@header;
60             }
61              
62             sub _header {
63 24     24   113 my ($self) = @_;
64 24         53 my @header = @{ $self->fixed_headers };
  24         117  
65 24         88 push( @header, @{$self->_sample_headers});
  24         128  
66 24 100       1148 push( @header, 'Inference' ) if ( $self->_verbose );
67 24         1052 return \@header;
68             }
69              
70             sub _build__sorted_file_names {
71 25     25   155 my ($self) = @_;
72 25         60 my @sorted_file_names = sort( @{ $self->analyse_groups_obj->fasta_files } );
  25         960  
73 25         950 return \@sorted_file_names;
74             }
75              
76             sub _non_unique_name_for_group {
77 63     63   205 my ( $self, $annotated_group_name ) = @_;
78 63         158 my $duplicate_gene_name = '';
79 63         1967 my $prefix = $self->annotate_groups_obj->_group_default_prefix;
80 63 100       422 if ( $annotated_group_name =~ /$prefix/ ) {
81 51         1696 my $non_unique_name_for_group =
82             $self->annotate_groups_obj->_consensus_gene_name_for_group($annotated_group_name);
83 51 50       324 if ( !( $non_unique_name_for_group =~ /$prefix/ ) ) {
84 0         0 $duplicate_gene_name = $non_unique_name_for_group;
85             }
86             }
87 63         198 return $duplicate_gene_name;
88             }
89              
90             sub _build__groups_to_files {
91 22     22   66 my ($self) = @_;
92 22         54 my %groups_to_files;
93 22         48 for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
  22         749  
94 70         2386 my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
95 70         146 my %filenames;
96 70         119 for my $gene_name ( @{$genes} ) {
  70         170  
97 121         4053 my $filename = $self->analyse_groups_obj->_genes_to_file->{$gene_name};
98 121         237 push( @{ $filenames{$filename} }, $gene_name );
  121         606  
99             }
100 70         411 $groups_to_files{$group} = \%filenames;
101             }
102            
103 22         790 return \%groups_to_files;
104             }
105              
106             sub _build__files_to_groups
107             {
108 1     1   4 my ($self) = @_;
109 1         3 my %files_to_groups;
110            
111 1         2 for my $group (keys %{$self->_groups_to_files})
  1         41  
112             {
113 7         13 for my $filename (keys %{$self->_groups_to_files->{$group}})
  7         213  
114             {
115 12         25 push(@{$files_to_groups{$filename}}, $group);
  12         34  
116             }
117             }
118            
119 1         38 return \%files_to_groups;
120             }
121              
122             sub _build__num_files_in_groups
123             {
124 24     24   69 my ($self) = @_;
125 24         61 my %num_files_in_groups;
126 24         59 for my $group (@{ $self->annotate_groups_obj->_groups })
  24         1004  
127             {
128 63         2221 my $num_files = $self->analyse_groups_obj->_count_num_files_in_group( $self->annotate_groups_obj->_groups_to_id_names->{$group});
129 63         253 $num_files_in_groups{$group} = $num_files;
130             }
131 24         901 return \%num_files_in_groups;
132             }
133              
134             sub _row {
135 63     63   174 my ( $self, $group ) = @_;
136 63         1877 my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
137              
138 63         1943 my $num_isolates_in_group = $self->analyse_groups_obj->_count_num_files_in_group($genes);
139 63         137 my $num_sequences_in_group = $#{$genes} + 1;
  63         158  
140 63         500 my $avg_sequences_per_isolate = ceil( ( $num_sequences_in_group / $num_isolates_in_group ) * 100 ) / 100;
141              
142 63         2246 my $annotation = $self->annotate_groups_obj->consensus_product_for_id_names($genes);
143 63         2086 my $annotated_group_name = $self->annotate_groups_obj->_groups_to_consensus_gene_names->{$group};
144              
145 63         246 my $duplicate_gene_name = $self->_non_unique_name_for_group($annotated_group_name);
146            
147 63         159 my $genome_number = '';
148 63         123 my $qc_comment = '';
149 63         102 my $order_within_fragement = '';
150 63         112 my $accessory_order_within_fragement = '';
151 63         117 my $accessory_genome_number = '';
152 63 50 66     2047 if(defined($self->groups_to_contigs) && defined($self->groups_to_contigs->{$annotated_group_name}))
153             {
154 0         0 $genome_number = $self->groups_to_contigs->{$annotated_group_name}->{label};
155 0         0 $qc_comment = $self->groups_to_contigs->{$annotated_group_name}->{comment};
156 0         0 $order_within_fragement = $self->groups_to_contigs->{$annotated_group_name}->{order};
157            
158 0         0 $accessory_genome_number = $self->groups_to_contigs->{$annotated_group_name}->{accessory_label};
159 0         0 $accessory_order_within_fragement = $self->groups_to_contigs->{$annotated_group_name}->{accessory_order};
160             }
161            
162 63         1899 my $group_size = $self->annotate_groups_obj->group_nucleotide_lengths->{$group};
163            
164             my @row = (
165             $annotated_group_name, $duplicate_gene_name, $annotation,
166             $num_isolates_in_group, $num_sequences_in_group, $avg_sequences_per_isolate,$genome_number,$order_within_fragement,$accessory_genome_number,$accessory_order_within_fragement,$qc_comment,$group_size->{min}, $group_size->{max}, $group_size->{average}
167 63         375 );
168            
169 63         258 for(my $i =0; $i < @row; $i++)
170             {
171 882 100       2103 if(!defined($row[$i]))
172             {
173 135         363 $row[$i] = '';
174             }
175             }
176              
177 63         121 for my $filename ( @{ $self->_sorted_file_names } ) {
  63         2103  
178 196         6122 my $group_to_file_genes = $self->_groups_to_files->{$group}->{$filename};
179              
180 196 100 66     566 if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
  109         401  
181              
182 109         243 push( @row, join( "\t", @{$group_to_file_genes} ) );
  109         420  
183 109         347 next;
184             }
185             else {
186 87         314 push( @row, '' );
187             }
188             }
189              
190             ## ADD INFERENCE AND FULL ANNOTATION IF VERBOSE REQUESTED ##
191 63 100       1828 if ( $self->_verbose ){
192 7         15 my ( $full_annotation, $inference );
193 7         162 $row[2] = $self->annotate_groups_obj->full_annotation($group);
194 7         201 push( @row, $self->annotate_groups_obj->inference($group) );
195             }
196              
197 63         1215 return \@row;
198             }
199              
200             sub create_rtab
201             {
202 1     1 0 4 my ($self) = @_;
203 1         50 my $presence_absence_matrix_obj = Bio::Roary::PresenceAbsenceMatrix->new(
204             output_filename => $self->output_rtab_filename,
205             annotate_groups_obj => $self->annotate_groups_obj,
206             sorted_file_names => $self->_sorted_file_names,
207             groups_to_files => $self->_groups_to_files,
208             num_files_in_groups => $self->_num_files_in_groups,
209             sample_headers => $self->_sample_headers,
210             );
211 1         6 $presence_absence_matrix_obj->create_matrix_file;
212 1         29 return $self;
213             }
214              
215             sub create_spreadsheet {
216 24     24 0 90 my ($self) = @_;
217              
218 24         1182 $self->_text_csv_obj->print( $self->_output_fh, $self->_header );
219              
220 24 50       501 for my $group (sort {$self->_num_files_in_groups->{$b}<=>$self->_num_files_in_groups->{$a} || $a cmp $b} keys %{$self->_num_files_in_groups}){
  72         2493  
  24         1000  
221 63         2580 $self->_text_csv_obj->print( $self->_output_fh, $self->_row($group) );
222             }
223 24         1092 close( $self->_output_fh );
224             }
225              
226 10     10   104 no Moose;
  10         26  
  10         62  
227             __PACKAGE__->meta->make_immutable;
228              
229             1;
230              
231             __END__
232              
233             =pod
234              
235             =encoding UTF-8
236              
237             =head1 NAME
238              
239             Bio::Roary::GroupStatistics - Add labels to the groups
240              
241             =head1 VERSION
242              
243             version 3.10.1
244              
245             =head1 SYNOPSIS
246              
247             Add labels to the groups
248             use Bio::Roary::GroupStatistics;
249              
250             my $obj = Bio::Roary::GroupStatistics->new(
251             output_filename => 'group_statitics.csv',
252             annotate_groups_obj => $annotate_groups_obj,
253             analyse_groups_obj => $analyse_groups_obj
254             );
255             $obj->create_spreadsheet;
256              
257             =head1 AUTHOR
258              
259             Andrew J. Page <ap13@sanger.ac.uk>
260              
261             =head1 COPYRIGHT AND LICENSE
262              
263             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
264              
265             This is free software, licensed under:
266              
267             The GNU General Public License, Version 3, June 2007
268              
269             =cut