File Coverage

lib/Bio/Roary/QC/Report.pm
Criterion Covered Total %
statement 47 118 39.8
branch 0 6 0.0
condition 0 13 0.0
subroutine 13 22 59.0
pod 0 1 0.0
total 60 160 37.5


line stmt bran cond sub pod time code
1             package Bio::Roary::QC::Report;
2             $Bio::Roary::QC::Report::VERSION = '3.10.2';
3             # ABSTRACT: generate a report based on kraken output
4              
5              
6 2     2   93012 use Moose;
  2         373971  
  2         22  
7 2     2   20160 use File::Temp;
  2         12417  
  2         212  
8 2     2   17 use File::Path 'rmtree';
  2         6  
  2         103  
9 2     2   16 use Cwd;
  2         5  
  2         122  
10 2     2   15 use File::Basename;
  2         5  
  2         3585  
11             with 'Bio::Roary::JobRunner::Role';
12              
13             has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
14             has 'kraken_exec' => ( is => 'ro', isa => 'Str', default => 'kraken' );
15             has 'kraken_report_exec' => ( is => 'ro', isa => 'Str', default => 'kraken-report' );
16             has 'kraken_db' => ( is => 'ro', isa => 'Str', required => 1 );
17             has 'outfile' => ( is => 'rw', isa => 'Str', default => 'qc_report.csv' );
18             has '_kraken_data' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 );
19             has '_header' => ( is => 'rw', isa => 'Str', lazy_build => 1 );
20             has 'kraken_memory' => ( is => 'rw', isa => 'Int', default => 2000 );
21              
22             has '_tmp_directory_obj' => ( is => 'rw', lazy_build => 1 );
23             has '_tmp_directory' => ( is => 'rw', lazy_build => 1, isa => 'Str', );
24              
25              
26             sub _nuc_fasta_filename
27             {
28 6     6   8 my ($self, $gff) = @_;
29              
30 6         166 my $prefix = basename( $gff, ".gff" );
31 6         109 my $outfile = $self->_tmp_directory . "/$prefix.fna";
32 6         12 return $outfile;
33             }
34              
35             sub _extract_nuc_fasta_cmd {
36 3     3   6 my ($self, $gff) = @_;
37 3         6 my $outfile = $self->_nuc_fasta_filename($gff);
38 3         7 my $cmd = "sed -n '/##FASTA/,//p' $gff | grep -v \'##FASTA\' > $outfile";
39              
40 3         8 return $cmd;
41             }
42              
43             sub _extract_nuc_files_from_all_gffs
44             {
45 1     1   2 my ($self) = @_;
46 1         2 my @nuc_files;
47             my @commands_to_run;
48 1         1 for my $input_file(@{$self->input_files})
  1         24  
49             {
50 2         5 push(@nuc_files,$self->_nuc_fasta_filename($input_file));
51 2         3 push(@commands_to_run,$self->_extract_nuc_fasta_cmd($input_file));
52             }
53 1         19 my $kraken_runner_obj = $self->_job_runner_class->new(
54             commands_to_run => \@commands_to_run,
55             memory_in_mb => $self->kraken_memory,
56             verbose => $self->verbose,
57             cpus => $self->cpus
58             );
59 1         3 $kraken_runner_obj->run();
60 1         59 return \@nuc_files;
61             }
62              
63             sub _kraken_cmd {
64 0     0   0 my ( $self, $a, $kraken_output ) = @_;
65              
66 0         0 my $kcmd = $self->kraken_exec .
67             " --fasta-input ".
68             " --preload ".
69             " --db " . $self->kraken_db .
70             " --output $kraken_output $a > /dev/null 2>&1";
71 0         0 return $kcmd;
72             }
73              
74             sub _kraken_report_cmd {
75 0     0   0 my ( $self, $k, $report_output ) = @_;
76              
77 0         0 my $krcmd = $self->kraken_report_exec .
78             " --db " . $self->kraken_db .
79             " $k > $report_output";
80 0         0 return $krcmd;
81             }
82              
83             sub _kraken_output_filename
84             {
85 0     0   0 my ( $self, $assembly ) = @_;
86 0         0 my $kraken_output = $assembly;
87 0         0 $kraken_output =~ s/fna$/kraken/;
88 0         0 return $kraken_output;
89             }
90              
91             sub _run_kraken_on_nuc_files
92             {
93 0     0   0 my ( $self, $nuc_files ) = @_;
94 0         0 my @kraken_output_files;
95             my @commands_to_run;
96 0         0 for my $nuc_file(@{$nuc_files})
  0         0  
97             {
98 0         0 my $kraken_output = $self->_kraken_output_filename($nuc_file);
99 0         0 push(@kraken_output_files, $kraken_output );
100 0         0 push(@commands_to_run, $self->_kraken_cmd( $nuc_file, $kraken_output ));
101             }
102            
103 0         0 my $kraken_runner_obj = $self->_job_runner_class->new(
104             commands_to_run => \@commands_to_run,
105             memory_in_mb => $self->kraken_memory,
106             verbose => $self->verbose,
107             cpus => $self->cpus
108             );
109 0         0 $kraken_runner_obj->run();
110            
111 0         0 for my $filename(@{$nuc_files})
  0         0  
112             {
113 0         0 unlink($filename);
114             }
115            
116 0         0 return \@kraken_output_files;
117             }
118              
119             sub _kraken_report_output_filename
120             {
121 0     0   0 my ( $self, $assembly ) = @_;
122 0         0 return $assembly.".report";
123             }
124              
125             sub _run_kraken_report_on_kraken_files
126             {
127 0     0   0 my ( $self, $kraken_files ) = @_;
128            
129 0         0 my @kraken_report_output_files;
130             my @commands_to_run;
131 0         0 for my $nuc_file(@{$kraken_files})
  0         0  
132             {
133 0         0 my $kraken_output = $self->_kraken_report_output_filename($nuc_file);
134 0         0 push(@kraken_report_output_files, $kraken_output );
135 0         0 push(@commands_to_run, $self->_kraken_report_cmd( $nuc_file, $kraken_output ));
136             }
137            
138 0         0 my $kraken_runner_obj = $self->_job_runner_class->new(
139             commands_to_run => \@commands_to_run,
140             memory_in_mb => $self->kraken_memory,
141             verbose => $self->verbose,
142             cpus => $self->cpus
143             );
144 0         0 $kraken_runner_obj->run();
145 0         0 for my $filename(@{$kraken_files})
  0         0  
146             {
147 0         0 unlink($filename);
148             }
149 0         0 return \@kraken_report_output_files;
150             }
151              
152             sub _build__kraken_data {
153 0     0   0 my $self = shift;
154 0         0 my $nuc_files = $self->_extract_nuc_files_from_all_gffs();
155 0         0 my $kraken_files = $self->_run_kraken_on_nuc_files($nuc_files);
156 0         0 my $kraken_report_files = $self->_run_kraken_report_on_kraken_files( $kraken_files );
157            
158 0         0 return $self->_parse_kraken_reports($kraken_report_files);
159             }
160              
161             sub _parse_kraken_reports
162             {
163 0     0   0 my ( $self, $kraken_report_files ) = @_;
164            
165 0         0 my @report_rows;
166 0         0 for my $kraken_report(@{$kraken_report_files})
  0         0  
167             {
168 0         0 push(@report_rows, $self->_parse_kraken_report($kraken_report));
169             }
170            
171 0         0 for my $kraken_report(@{$kraken_report_files})
  0         0  
172             {
173 0         0 unlink($kraken_report);
174             }
175            
176 0         0 return \@report_rows;
177             }
178              
179             sub _parse_kraken_report {
180 0     0   0 my ( $self, $kraken_report ) = @_;
181              
182             # parse report
183 0         0 open( my $report_fh, '<', $kraken_report );
184            
185 0         0 my $sample_name = $kraken_report;
186 0         0 $sample_name =~ s/.report$//;
187 0         0 $sample_name =~ s/.kraken$//;
188 0         0 my($sample_base_name, $dirs, $suffix) = fileparse($sample_name);
189            
190 0         0 my ( $top_genus, $top_species );
191 0         0 while ( <$report_fh> ){
192 0         0 my @parts = split( "\t" );
193 0         0 chomp @parts;
194              
195 0 0 0     0 $top_genus = $parts[5] if ( (! defined $top_genus) && $parts[3] eq 'G' );
196 0 0 0     0 $top_species = $parts[5] if ( (! defined $top_species) && $parts[3] eq 'S' );
197              
198 0 0 0     0 last if (defined $top_genus && defined $top_species);
199             }
200 0         0 close($report_fh);
201              
202 0   0     0 $top_genus ||= "not_found";
203 0         0 $top_genus =~ s/^\s+//g;
204 0   0     0 $top_species ||= "not_found";
205 0         0 $top_species =~ s/^\s+//g;
206              
207 0         0 return [ $sample_base_name, $top_genus, $top_species ];
208             }
209              
210              
211             sub _build__header {
212 1     1   16 return join( ',', ( 'Sample', 'Genus', 'Species' ) );
213             }
214              
215             sub _build__tmp_directory_obj {
216 1     1   16 return File::Temp->newdir(DIR => getcwd, CLEANUP => 1 );
217             }
218              
219             sub _build__tmp_directory {
220 1     1   1 my $self = shift;
221 1         22 return $self->_tmp_directory_obj->dirname();
222             }
223              
224             sub report {
225 1     1 0 2 my $self = shift;
226              
227 1         25 open( OUTFILE, '>', $self->outfile );
228 1         25 print OUTFILE $self->_header . "\n";
229 1         2 for my $line ( @{ $self->_kraken_data } ){
  1         18  
230 3         4 print OUTFILE join( ',', @{ $line } ) . "\n";
  3         8  
231             }
232 1         46 close OUTFILE;
233             }
234              
235              
236             __PACKAGE__->meta->make_immutable;
237 2     2   28 no Moose;
  2         6  
  2         25  
238             1;
239              
240             __END__
241              
242             =pod
243              
244             =encoding UTF-8
245              
246             =head1 NAME
247              
248             Bio::Roary::QC::Report - generate a report based on kraken output
249              
250             =head1 VERSION
251              
252             version 3.10.2
253              
254             =head1 SYNOPSIS
255              
256             =head1 AUTHOR
257              
258             Andrew J. Page <ap13@sanger.ac.uk>
259              
260             =head1 COPYRIGHT AND LICENSE
261              
262             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
263              
264             This is free software, licensed under:
265              
266             The GNU General Public License, Version 3, June 2007
267              
268             =cut