File Coverage

lib/Bio/Roary/GroupStatistics.pm

Criterion	Covered	Total	%
statement	138	144	95.8
branch	14	18	77.7
condition	4	6	66.6
subroutine	23	23	100.0
pod	0	3	0.0
total	179	194	92.2

line	stmt	bran	cond	sub	pod	time	code
1							package Bio::Roary::GroupStatistics;
2							$Bio::Roary::GroupStatistics::VERSION = '3.10.2';
3							# ABSTRACT: Add labels to the groups
4
5
6	10			10		82799	use Moose;
	10					370065
	10					89
7	10			10		66367	use POSIX;
	10					40807
	10					66
8	10			10		24278	use Text::CSV;
	10					53502
	10					435
9	10			10		72	use File::Basename;
	10					33
	10					622
10	10			10		2759	use Bio::SeqIO;
	10					243953
	10					352
11	10			10		1567	use Bio::Roary::Exceptions;
	10					28
	10					224
12	10			10		1428	use Bio::Roary::AnalyseGroups;
	10					52
	10					399
13	10			10		2699	use Bio::Roary::AnnotateGroups;
	10					29
	10					316
14	10			10		3459	use Bio::Roary::PresenceAbsenceMatrix;
	10					31
	10					12562
15
16							has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
17							has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
18							has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.csv' );
19							has 'output_rtab_filename' => ( is => 'ro', isa => 'Str', default => 'gene_presence_absence.Rtab' );
20							has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]');
21							has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
22							has '_text_csv_obj' => ( is => 'ro', isa => 'Text::CSV', lazy => 1, builder => '_build__text_csv_obj' );
23							has '_sorted_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__sorted_file_names' );
24							has '_groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' );
25							has '_files_to_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__files_to_groups' );
26							has '_num_files_in_groups' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__num_files_in_groups' );
27							has '_verbose' => ( is => 'ro', isa => 'Bool', default => 0 );
28
29
30							sub _build__output_fh {
31	24			24		100	my ($self) = @_;
32	24	50				1088	open( my $fh, '>', $self->output_filename )
33							or Bio::Roary::Exceptions::CouldntWriteToFile->throw(
34							error => "Couldnt write output file:" . $self->output_filename );
35	24					819	return $fh;
36							}
37
38							sub _build__text_csv_obj {
39	24			24		121	my ($self) = @_;
40	24					706	return Text::CSV->new( { binary => 1, always_quote => 1, eol => "\r\n" } );
41							}
42
43							sub fixed_headers {
44	435			435	0	552	my ($self) = @_;
45	435					1291	my @header =
46							( 'Gene', 'Non-unique Gene name', 'Annotation', 'No. isolates', 'No. sequences', 'Avg sequences per isolate', 'Genome Fragment','Order within Fragment', 'Accessory Fragment','Accessory Order with Fragment', 'QC','Min group size nuc', 'Max group size nuc', 'Avg group size nuc' );
47	435					1260	return \@header;
48							}
49
50							sub _sample_headers
51							{
52	25			25		95	my ($self) = @_;
53	25					43	my @header;
54	25					44	for my $filename ( @{ $self->_sorted_file_names } ) {
	25					836
55	76					2949	my $filename_cpy = basename($filename);
56	76					282	$filename_cpy =~ s!\.gff\.proteome\.faa!!;
57	76					194	push( @header, $filename_cpy );
58							}
59	25					139	return \@header;
60							}
61
62							sub _header {
63	24			24		79	my ($self) = @_;
64	24					60	my @header = @{ $self->fixed_headers };
	24					133
65	24					78	push( @header, @{$self->_sample_headers});
	24					125
66	24	100				895	push( @header, 'Inference' ) if ( $self->_verbose );
67	24					802	return \@header;
68							}
69
70							sub _build__sorted_file_names {
71	25			25		78	my ($self) = @_;
72	25					58	my @sorted_file_names = sort( @{ $self->analyse_groups_obj->fasta_files } );
	25					758
73	25					749	return \@sorted_file_names;
74							}
75
76							sub _non_unique_name_for_group {
77	63			63		138	my ( $self, $annotated_group_name ) = @_;
78	63					130	my $duplicate_gene_name = '';
79	63					1614	my $prefix = $self->annotate_groups_obj->_group_default_prefix;
80	63	100				350	if ( $annotated_group_name =~ /$prefix/ ) {
81	51					1354	my $non_unique_name_for_group =
82							$self->annotate_groups_obj->_consensus_gene_name_for_group($annotated_group_name);
83	51	50				299	if ( !( $non_unique_name_for_group =~ /$prefix/ ) ) {
84	0					0	$duplicate_gene_name = $non_unique_name_for_group;
85							}
86							}
87	63					160	return $duplicate_gene_name;
88							}
89
90							sub _build__groups_to_files {
91	22			22		60	my ($self) = @_;
92	22					42	my %groups_to_files;
93	22					36	for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
	22					661
94	70					1873	my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
95	70					107	my %filenames;
96	70					95	for my $gene_name ( @{$genes} ) {
	70					131
97	121					3054	my $filename = $self->analyse_groups_obj->_genes_to_file->{$gene_name};
98	121					192	push( @{ $filenames{$filename} }, $gene_name );
	121					471
99							}
100	70					213	$groups_to_files{$group} = \%filenames;
101							}
102
103	22					615	return \%groups_to_files;
104							}
105
106							sub _build__files_to_groups
107							{
108	1			1		3	my ($self) = @_;
109	1					1	my %files_to_groups;
110
111	1					2	for my $group (keys %{$self->_groups_to_files})
	1					21
112							{
113	7					7	for my $filename (keys %{$self->_groups_to_files->{$group}})
	7					127
114							{
115	12					14	push(@{$files_to_groups{$filename}}, $group);
	12					40
116							}
117							}
118
119	1					19	return \%files_to_groups;
120							}
121
122							sub _build__num_files_in_groups
123							{
124	24			24		61	my ($self) = @_;
125	24					59	my %num_files_in_groups;
126	24					52	for my $group (@{ $self->annotate_groups_obj->_groups })
	24					807
127							{
128	63					1677	my $num_files = $self->analyse_groups_obj->_count_num_files_in_group( $self->annotate_groups_obj->_groups_to_id_names->{$group});
129	63					185	$num_files_in_groups{$group} = $num_files;
130							}
131	24					724	return \%num_files_in_groups;
132							}
133
134							sub _row {
135	63			63		155	my ( $self, $group ) = @_;
136	63					1672	my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
137
138	63					1661	my $num_isolates_in_group = $self->analyse_groups_obj->_count_num_files_in_group($genes);
139	63					123	my $num_sequences_in_group = $#{$genes} + 1;
	63					149
140	63					441	my $avg_sequences_per_isolate = ceil( ( $num_sequences_in_group / $num_isolates_in_group ) * 100 ) / 100;
141
142	63					1849	my $annotation = $self->annotate_groups_obj->consensus_product_for_id_names($genes);
143	63					1766	my $annotated_group_name = $self->annotate_groups_obj->_groups_to_consensus_gene_names->{$group};
144
145	63					214	my $duplicate_gene_name = $self->_non_unique_name_for_group($annotated_group_name);
146
147	63					136	my $genome_number = '';
148	63					119	my $qc_comment = '';
149	63					116	my $order_within_fragement = '';
150	63					92	my $accessory_order_within_fragement = '';
151	63					92	my $accessory_genome_number = '';
152	63	50	66			1889	if(defined($self->groups_to_contigs) && defined($self->groups_to_contigs->{$annotated_group_name}))
153							{
154	0					0	$genome_number = $self->groups_to_contigs->{$annotated_group_name}->{label};
155	0					0	$qc_comment = $self->groups_to_contigs->{$annotated_group_name}->{comment};
156	0					0	$order_within_fragement = $self->groups_to_contigs->{$annotated_group_name}->{order};
157
158	0					0	$accessory_genome_number = $self->groups_to_contigs->{$annotated_group_name}->{accessory_label};
159	0					0	$accessory_order_within_fragement = $self->groups_to_contigs->{$annotated_group_name}->{accessory_order};
160							}
161
162	63					1578	my $group_size = $self->annotate_groups_obj->group_nucleotide_lengths->{$group};
163
164							my @row = (
165							$annotated_group_name, $duplicate_gene_name, $annotation,
166							$num_isolates_in_group, $num_sequences_in_group, $avg_sequences_per_isolate,$genome_number,$order_within_fragement,$accessory_genome_number,$accessory_order_within_fragement,$qc_comment,$group_size->{min}, $group_size->{max}, $group_size->{average}
167	63					312	);
168
169	63					216	for(my $i =0; $i < @row; $i++)
170							{
171	882	100				2148	if(!defined($row[$i]))
172							{
173	135					310	$row[$i] = '';
174							}
175							}
176
177	63					110	for my $filename ( @{ $self->_sorted_file_names } ) {
	63					1732
178	196					4996	my $group_to_file_genes = $self->_groups_to_files->{$group}->{$filename};
179
180	196	100	66			528	if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
	109					372
181
182	109					169	push( @row, join( "\t", @{$group_to_file_genes} ) );
	109					384
183	109					237	next;
184							}
185							else {
186	87					290	push( @row, '' );
187							}
188							}
189
190							## ADD INFERENCE AND FULL ANNOTATION IF VERBOSE REQUESTED ##
191	63	100				1553	if ( $self->_verbose ){
192	7					16	my ( $full_annotation, $inference );
193	7					124	$row[2] = $self->annotate_groups_obj->full_annotation($group);
194	7					137	push( @row, $self->annotate_groups_obj->inference($group) );
195							}
196
197	63					1080	return \@row;
198							}
199
200							sub create_rtab
201							{
202	1			1	0	3	my ($self) = @_;
203	1					32	my $presence_absence_matrix_obj = Bio::Roary::PresenceAbsenceMatrix->new(
204							output_filename => $self->output_rtab_filename,
205							annotate_groups_obj => $self->annotate_groups_obj,
206							sorted_file_names => $self->_sorted_file_names,
207							groups_to_files => $self->_groups_to_files,
208							num_files_in_groups => $self->_num_files_in_groups,
209							sample_headers => $self->_sample_headers,
210							);
211	1					7	$presence_absence_matrix_obj->create_matrix_file;
212	1					27	return $self;
213							}
214
215							sub create_spreadsheet {
216	24			24	0	82	my ($self) = @_;
217
218	24					977	$self->_text_csv_obj->print( $self->_output_fh, $self->_header );
219
220	24	50				477	for my $group (sort {$self->_num_files_in_groups->{$b}<=>$self->_num_files_in_groups->{$a} \|\| $a cmp $b} keys %{$self->_num_files_in_groups}){
	67					1609
	24					897
221	63					2187	$self->_text_csv_obj->print( $self->_output_fh, $self->_row($group) );
222							}
223	24					1079	close( $self->_output_fh );
224							}
225
226	10			10		98	no Moose;
	10					23
	10					68
227							__PACKAGE__->meta->make_immutable;
228
229							1;
230
231							__END__
232
233							=pod
234
235							=encoding UTF-8
236
237							=head1 NAME
238
239							Bio::Roary::GroupStatistics - Add labels to the groups
240
241							=head1 VERSION
242
243							version 3.10.2
244
245							=head1 SYNOPSIS
246
247							Add labels to the groups
248							use Bio::Roary::GroupStatistics;
249
250							my $obj = Bio::Roary::GroupStatistics->new(
251							output_filename => 'group_statitics.csv',
252							annotate_groups_obj => $annotate_groups_obj,
253							analyse_groups_obj => $analyse_groups_obj
254							);
255							$obj->create_spreadsheet;
256
257							=head1 AUTHOR
258
259							Andrew J. Page <ap13@sanger.ac.uk>
260
261							=head1 COPYRIGHT AND LICENSE
262
263							This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
264
265							This is free software, licensed under:
266
267							The GNU General Public License, Version 3, June 2007
268
269							=cut