File Coverage

lib/Bio/Roary/PrepareInputFiles.pm
Criterion Covered Total %
statement 64 69 92.7
branch 5 8 62.5
condition n/a
subroutine 16 17 94.1
pod 0 2 0.0
total 85 96 88.5


line stmt bran cond sub pod time code
1             package Bio::Roary::PrepareInputFiles;
2             $Bio::Roary::PrepareInputFiles::VERSION = '3.11.0';
3             # ABSTRACT: Take in a mixture of FASTA and GFF input files and output FASTA proteomes only
4              
5              
6 4     4   92811 use Moose;
  4         378102  
  4         29  
7 4     4   24076 use Bio::Roary::Exceptions;
  4         9  
  4         119  
8 4     4   1037 use Bio::Roary::ExtractProteomeFromGFFs;
  4         13  
  4         167  
9 4     4   1730 use Bio::Roary::FilterUnknownsFromFasta;
  4         15  
  4         219  
10 4     4   38 use Cwd qw(getcwd);
  4         10  
  4         251  
11 4     4   21 use File::Temp;
  4         10  
  4         281  
12 4     4   73 use Log::Log4perl qw(:easy);
  4         8  
  4         38  
13              
14             has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
15             has 'job_runner' => ( is => 'ro', isa => 'Str', default => 'Local' );
16             has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
17             has '_input_gff_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_gff_files' );
18             has '_input_fasta_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files' );
19             has '_input_fasta_files_filtered' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__input_fasta_files_filtered' );
20             has '_input_fasta_files_filtered_obj' =>
21             ( is => 'ro', isa => 'Bio::Roary::FilterUnknownsFromFasta', lazy => 1, builder => '_build__input_fasta_files_filtered_obj' );
22              
23             has '_derived_fasta_files' => ( is => 'ro', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_build__derived_fasta_files' );
24             has '_extract_proteome_obj' => (
25             is => 'ro',
26             isa => 'Bio::Roary::ExtractProteomeFromGFFs',
27             lazy => 1,
28             builder => '_build__extract_proteome_obj'
29             );
30             has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 );
31             has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
32             has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
33             has '_fasta_filter_obj' => ( is => 'ro', isa => 'Bio::Roary::FilterUnknowsFromFasta', lazy => 1, builder => '_fasta_filter_obj' );
34             has 'working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
35             has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger' );
36              
37             sub _build_logger {
38 0     0   0 my ($self) = @_;
39 0         0 Log::Log4perl->easy_init($ERROR);
40 0         0 my $logger = get_logger();
41 0         0 return $logger;
42             }
43              
44             sub _build__input_gff_files {
45 19     19   39 my ($self) = @_;
46 19         31 my @gff_files = grep( /\.gff$/, @{ $self->input_files } );
  19         379  
47 19         434 return \@gff_files;
48             }
49              
50             sub _build__input_fasta_files {
51 19     19   42 my ($self) = @_;
52 19         33 my @fasta_files = grep( !/\.gff$/, @{ $self->input_files } );
  19         497  
53              
54 19         38 my @validated_fasta_files;
55              
56 19         68 for my $fasta_file (@fasta_files) {
57 44         80 eval {
58 44         339 my $inseq = Bio::SeqIO->new(
59             -file => $fasta_file,
60             -format => 'fasta',
61             -alphabet => 'protein'
62             );
63 44         38493 while ( my $seq = $inseq->next_seq ) {
64              
65             # do something to force the reading.
66 183         23429 $seq->seq;
67             }
68             };
69 44 50       5362 if ($@) {
70 0         0 $self->logger->warn(
71             "Input file doesnt have a .gff extension and isnt a protein FASTA file so excluding it from further analysis: $fasta_file"
72             );
73             }
74             else {
75 44         117 push( @validated_fasta_files, $fasta_file );
76             }
77              
78             }
79              
80 19         627 return \@fasta_files;
81             }
82              
83             sub _build__input_fasta_files_filtered_obj {
84 19     19   41 my ($self) = @_;
85 19         421 return Bio::Roary::FilterUnknownsFromFasta->new( fasta_files => $self->_input_fasta_files );
86             }
87              
88             sub _build__input_fasta_files_filtered {
89 19     19   47 my ($self) = @_;
90 19 50       536 return undef if ( !defined( $self->_input_fasta_files ) );
91 19         526 return $self->_input_fasta_files_filtered_obj->filtered_fasta_files();
92             }
93              
94             sub _build__extract_proteome_obj {
95 19     19   38 my ($self) = @_;
96 19         387 return Bio::Roary::ExtractProteomeFromGFFs->new(
97             gff_files => $self->_input_gff_files,
98             job_runner => $self->job_runner,
99             apply_unknowns_filter => $self->apply_unknowns_filter,
100             translation_table => $self->translation_table,
101             cpus => $self->cpus,
102             verbose => $self->verbose,
103             working_directory => $self->working_directory,
104             );
105             }
106              
107             sub _build__derived_fasta_files {
108 19     19   51 my ($self) = @_;
109 19 50       452 return undef if ( !defined( $self->_input_gff_files ) );
110 19         434 return $self->_extract_proteome_obj->fasta_files();
111             }
112              
113             sub fasta_files {
114 59     59 0 200 my ($self) = @_;
115 59         118 my @output_fasta_files = ( @{ $self->_input_fasta_files_filtered }, @{ $self->_derived_fasta_files } );
  59         1941  
  59         1631  
116 59         1533 return \@output_fasta_files;
117             }
118              
119             sub lookup_fasta_files_from_unknown_input_files {
120 15     15 0 1199 my ( $self, $input_files ) = @_;
121 15         67 $self->fasta_files;
122              
123 15         40 my @output_fasta_files;
124 15         34 for my $input_file ( @{$input_files} ) {
  15         81  
125 23 100       632 if ( defined( $self->_extract_proteome_obj->fasta_files_to_gff_files->{$input_file} ) ) {
126 4         114 push( @output_fasta_files, $self->_extract_proteome_obj->fasta_files_to_gff_files->{$input_file} );
127             }
128             else {
129 19         608 push( @output_fasta_files, $self->_input_fasta_files_filtered_obj->input_fasta_to_output_fasta->{$input_file} );
130             }
131             }
132 15         613 return \@output_fasta_files;
133             }
134              
135 4     4   5815 no Moose;
  4         23  
  4         29  
136             __PACKAGE__->meta->make_immutable;
137              
138             1;
139              
140             __END__
141              
142             =pod
143              
144             =encoding UTF-8
145              
146             =head1 NAME
147              
148             Bio::Roary::PrepareInputFiles - Take in a mixture of FASTA and GFF input files and output FASTA proteomes only
149              
150             =head1 VERSION
151              
152             version 3.11.0
153              
154             =head1 SYNOPSIS
155              
156             Take in a mixture of FASTA and GFF input files and output FASTA proteomes only
157             use Bio::Roary::PrepareInputFiles;
158              
159             my $obj = Bio::Roary::PrepareInputFiles->new(
160             input_files => ['abc.gff','ddd.faa'],
161             );
162             $obj->fasta_files;
163              
164             =head1 AUTHOR
165              
166             Andrew J. Page <ap13@sanger.ac.uk>
167              
168             =head1 COPYRIGHT AND LICENSE
169              
170             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
171              
172             This is free software, licensed under:
173              
174             The GNU General Public License, Version 3, June 2007
175              
176             =cut