File Coverage

lib/Bio/Roary/CommandLine/RoaryPostAnalysis.pm
Criterion Covered Total %
statement 24 88 27.2
branch 0 54 0.0
condition 0 6 0.0
subroutine 8 14 57.1
pod 0 3 0.0
total 32 165 19.3


line stmt bran cond sub pod time code
1             undef $VERSION;
2             package Bio::Roary::CommandLine::RoaryPostAnalysis;
3             $Bio::Roary::CommandLine::RoaryPostAnalysis::VERSION = '3.11.0';
4             # ABSTRACT: Perform the post analysis on the pan genome
5              
6              
7 1     1   443002 use Moose;
  1         8  
  1         6  
8 1     1   6144 use Getopt::Long qw(GetOptionsFromArray);
  1         7784  
  1         4  
9 1     1   401 use Bio::Roary::PostAnalysis;
  1         3  
  1         43  
10 1     1   556 use File::Find::Rule;
  1         6116  
  1         7  
11 1     1   346 use Bio::Roary::External::GeneAlignmentFromNucleotides;
  1         3  
  1         44  
12 1     1   8 use File::Path qw(remove_tree);
  1         2  
  1         86  
13 1     1   7 use Bio::Roary::External::Fasttree;
  1         2  
  1         804  
14             extends 'Bio::Roary::CommandLine::Common';
15              
16             has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
17             has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 );
18             has 'help' => ( is => 'rw', isa => 'Bool', default => 0 );
19             has '_error_message' => ( is => 'rw', isa => 'Str' );
20              
21             has 'fasta_files' => ( is => 'rw', isa => 'Str', default => '_fasta_files' );
22             has 'input_files' => ( is => 'rw', isa => 'Str', default => '_gff_files');
23             has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
24             has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome.fa' );
25             has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
26             has 'output_multifasta_files' => ( is => 'rw', isa => 'Bool', default => 0 );
27             has 'clusters_filename' => ( is => 'rw', isa => 'Str', default => '_clustered.clstr' );
28             has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'Local' );
29             has 'cpus' => ( is => 'rw', isa => 'Int', default => 1 );
30             has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 );
31             has 'dont_create_rplots' => ( is => 'rw', isa => 'Bool', default => 0 );
32             has 'dont_split_groups' => ( is => 'rw', isa => 'Bool', default => 0 );
33             has 'verbose_stats' => ( is => 'rw', isa => 'Bool', default => 0 );
34             has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
35             has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50000 );
36             has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
37             has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
38             has 'mafft' => ( is => 'rw', isa => 'Bool', default => 0 );
39             has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
40              
41             sub BUILD {
42 0     0 0   my ($self) = @_;
43              
44             my (
45 0           $output_filename, $dont_create_rplots, $dont_delete_files, $dont_split_groups, $output_pan_geneome_filename,
46             $job_runner, $output_statistics_filename, $output_multifasta_files, $clusters_filename, $core_definition,
47             $fasta_files, $input_files, $verbose_stats, $translation_table, $help, $cpus,$group_limit,$verbose,$mafft, $allow_paralogs
48             );
49              
50              
51 0           GetOptionsFromArray(
52             $self->args,
53             'o|output=s' => \$output_filename,
54             'j|job_runner=s' => \$job_runner,
55             'm|output_multifasta_files' => \$output_multifasta_files,
56             'p=s' => \$output_pan_geneome_filename,
57             's=s' => \$output_statistics_filename,
58             'c=s' => \$clusters_filename,
59             'f=s' => \$fasta_files,
60             'i=s' => \$input_files,
61             'a|dont_delete_files' => \$dont_delete_files,
62             'b|dont_create_rplots' => \$dont_create_rplots,
63             'd|dont_split_groups' => \$dont_split_groups,
64             'e|verbose_stats' => \$verbose_stats,
65             'z|processors=i' => \$cpus,
66             't|translation_table=i' => \$translation_table,
67             'g|group_limit=i' => \$group_limit,
68             'cd|core_definition=f' => \$core_definition,
69             'v|verbose' => \$verbose,
70             'n|mafft' => \$mafft,
71             'q|allow_paralogs' => \$allow_paralogs,
72             'h|help' => \$help,
73             );
74            
75 0 0         $self->help($help) if(defined($help));
76 0 0         $self->job_runner($job_runner) if ( defined($job_runner) );
77 0 0         $self->fasta_files($fasta_files) if ( defined($fasta_files) );
78 0 0         $self->input_files($input_files) if ( defined($input_files) );
79 0 0         $self->output_filename($output_filename) if ( defined($output_filename) );
80 0 0         $self->output_pan_geneome_filename($output_pan_geneome_filename) if ( defined($output_pan_geneome_filename) );
81 0 0         $self->output_statistics_filename($output_statistics_filename) if ( defined($output_statistics_filename) );
82 0 0         $self->output_multifasta_files($output_multifasta_files) if ( defined($output_multifasta_files) );
83 0 0         $self->clusters_filename($clusters_filename) if ( defined($clusters_filename) );
84 0 0         $self->dont_delete_files($dont_delete_files) if ( defined($dont_delete_files) );
85 0 0         $self->dont_create_rplots($dont_create_rplots) if ( defined($dont_create_rplots) );
86 0 0         $self->dont_split_groups($dont_split_groups) if ( defined($dont_split_groups) );
87 0 0         $self->verbose_stats($verbose_stats) if ( defined($verbose_stats));
88 0 0         $self->translation_table($translation_table) if ( defined($translation_table) );
89 0 0         $self->cpus($cpus) if ( defined($cpus) );
90 0 0         $self->group_limit($group_limit) if ( defined($group_limit) );
91 0 0         $self->core_definition( $core_definition/100 ) if ( defined($core_definition) );
92 0 0         $self->mafft($mafft) if ( defined($mafft) );
93 0 0         $self->allow_paralogs($allow_paralogs) if ( defined($allow_paralogs) );
94 0 0         if ( defined($verbose) ) {
95 0           $self->verbose($verbose);
96 0           $self->logger->level(10000);
97             }
98             }
99              
100             sub run {
101 0     0 0   my ($self) = @_;
102              
103 0 0         ( !$self->help ) or die $self->usage_text;
104 0 0         if ( defined( $self->_error_message ) ) {
105 0           print $self->_error_message . "\n";
106 0           die $self->usage_text;
107             }
108              
109 0           my $input_files = $self->_read_file_into_array($self->input_files);
110 0           my $obj = Bio::Roary::PostAnalysis->new(
111             fasta_files => $self->_read_file_into_array($self->fasta_files) ,
112             input_files => $input_files ,
113             output_filename => $self->output_filename ,
114             output_pan_geneome_filename => $self->output_pan_geneome_filename,
115             output_statistics_filename => $self->output_statistics_filename ,
116             output_multifasta_files => $self->output_multifasta_files ,
117             clusters_filename => $self->clusters_filename ,
118             dont_delete_files => $self->dont_delete_files,
119             dont_create_rplots => $self->dont_create_rplots,
120             dont_split_groups => $self->dont_split_groups,
121             verbose_stats => $self->verbose_stats,
122             group_limit => $self->group_limit,
123             verbose => $self->verbose,
124             cpus => $self->cpus,
125             logger => $self->logger,
126             core_definition => $self->core_definition,
127             );
128 0           $obj->run();
129            
130 0 0         if($self->dont_delete_files == 0)
131             {
132 0           unlink('_inflated_unsplit_mcl_groups');
133 0           remove_tree('split_groups');
134             }
135              
136 0 0         if($self->output_multifasta_files == 1)
137             {
138 0 0         print "Aligning each cluster\n" if($self->verbose);
139            
140 0           my $job_runner_to_use = $self->job_runner;
141 0 0 0       if($self->_is_lsf_job_runner_available && $self->job_runner eq "LSF")
142             {
143 0           $job_runner_to_use = $self->job_runner;
144             }
145             else
146             {
147 0           $job_runner_to_use = 'Parallel';
148             }
149            
150 0           my $output_gene_files = $self->_find_input_files;
151             my $seg = Bio::Roary::External::GeneAlignmentFromNucleotides->new(
152             fasta_files => $output_gene_files,
153             job_runner => $job_runner_to_use,
154             translation_table => $self->translation_table,
155             core_definition => $self->core_definition,
156             cpus => $self->cpus,
157             verbose => $self->verbose,
158             mafft => $self->mafft,
159             allow_paralogs => $self->allow_paralogs,
160             dont_delete_files => $self->dont_delete_files,
161 0           num_input_files => $#{$input_files},
  0            
162             );
163 0           $seg->run();
164             }
165             }
166              
167             sub _is_lsf_job_runner_available
168             {
169 0     0     my ($self) = @_;
170 0           my $rc = eval "require Bio::Roary::JobRunner::LSF; 1;";
171 0 0 0       if(defined($rc) && $rc == 1)
172             {
173 0           return 1;
174             }
175             else
176             {
177 0           return 0;
178             }
179             }
180              
181             sub _find_input_files
182             {
183 0     0     my ($self) = @_;
184 0           my @files = File::Find::Rule->file()
185             ->name( '*.fa' )
186             ->in('pan_genome_sequences' );
187 0           return \@files;
188             }
189              
190             sub _read_file_into_array
191             {
192 0     0     my ($self, $filename) = @_;
193 0           open(my $in_fh, $filename);
194            
195 0           my @filenames;
196 0           while(<$in_fh>){
197 0           chomp;
198 0           my $line = $_;
199 0           push(@filenames, $line);
200             }
201 0           return \@filenames;
202             }
203              
204             sub usage_text {
205 0     0 0   my ($self) = @_;
206              
207 0           return <<USAGE;
208             Usage: pan_genome_post_analysis [options]
209             Perform the post analysis on the pan genome. This script is usally only called by another script.
210              
211             Options: -a dont delete intermediate files
212             -b dont create R plots
213             -c STR clusters filename [_clustered.clstr]
214             -cd FLOAT percentage of isolates a gene must be in to be core [0.99]
215             -d dont split groups
216             -e add inference values to gene presence and absence spreadsheet
217             -f STR file of protein filenames [_fasta_files]
218             -g INT maximum number of clusters [50000]
219             -i STR file of GFF filenames [_gff_files]
220             -m core gene alignement with PRANK
221             -n fast core gene alignement with MAFFT instead of PRANK
222             -o STR clusters output filename [clustered_proteins]
223             -p STR output pan genome filename [pan_genome.fa]
224             -q allow paralogs in core alignment
225             -s STR output gene presence and absence filename [gene_presence_absence.csv]
226             -t INT translation table [11]
227             -z INT number of threads [1]
228             -v verbose output to STDOUT
229             -h this help message
230            
231             For further info see: http://sanger-pathogens.github.io/Roary/
232             USAGE
233             }
234              
235             __PACKAGE__->meta->make_immutable;
236 1     1   7 no Moose;
  1         2  
  1         6  
237             1;
238              
239             __END__
240              
241             =pod
242              
243             =encoding UTF-8
244              
245             =head1 NAME
246              
247             Bio::Roary::CommandLine::RoaryPostAnalysis - Perform the post analysis on the pan genome
248              
249             =head1 VERSION
250              
251             version 3.11.0
252              
253             =head1 SYNOPSIS
254              
255             Perform the post analysis on the pan genome
256              
257             =head1 AUTHOR
258              
259             Andrew J. Page <ap13@sanger.ac.uk>
260              
261             =head1 COPYRIGHT AND LICENSE
262              
263             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
264              
265             This is free software, licensed under:
266              
267             The GNU General Public License, Version 3, June 2007
268              
269             =cut