File Coverage

lib/Bio/Roary/AccessoryBinaryFasta.pm
Criterion Covered Total %
statement 56 69 81.1
branch 6 8 75.0
condition 5 6 83.3
subroutine 11 12 91.6
pod 0 1 0.0
total 78 96 81.2


line stmt bran cond sub pod time code
1             package Bio::Roary::AccessoryBinaryFasta;
2             $Bio::Roary::AccessoryBinaryFasta::VERSION = '3.11.0';
3             # ABSTRACT: Output a FASTA file which represents the binary presence and absence of genes in the accessory genome
4              
5              
6 2     2   92140 use Moose;
  2         376797  
  2         10  
7 2     2   12078 use POSIX;
  2         5407  
  2         12  
8 2     2   3718 use Bio::Roary::AnnotateGroups;
  2         6  
  2         57  
9 2     2   377 use Bio::Roary::AnalyseGroups;
  2         4  
  2         53  
10 2     2   13 use Bio::Roary::Exceptions;
  2         20  
  2         37  
11 2     2   494 use Bio::SeqIO;
  2         8842  
  2         57  
12 2     2   11 use File::Basename;
  2         2  
  2         1079  
13              
14             has 'input_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
15             has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
16             has 'analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
17             has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'accessory_binary_genes.fa' );
18             has 'lower_bound_percentage' => ( is => 'ro', isa => 'Int', default => 5 );
19             has 'upper_bound_percentage' => ( is => 'ro', isa => 'Int', default => 5 );
20             has 'max_accessory_to_include' => ( is => 'ro', isa => 'Int', default => 4000 );
21             has 'groups_to_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_to_files' );
22             has '_lower_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__lower_bound_value' );
23             has '_upper_bound_value' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build__upper_bound_value' );
24              
25             sub _build__groups_to_files {
26 0     0   0 my ($self) = @_;
27 0         0 my %groups_to_files;
28 0         0 for my $group ( @{ $self->annotate_groups_obj->_groups } ) {
  0         0  
29 0         0 my $genes = $self->annotate_groups_obj->_groups_to_id_names->{$group};
30 0         0 my %filenames;
31 0         0 for my $gene_name ( @{$genes} ) {
  0         0  
32 0         0 my $filename = $self->analyse_groups_obj->_genes_to_file->{$gene_name};
33 0         0 push( @{ $filenames{$filename} }, $gene_name );
  0         0  
34             }
35 0         0 $groups_to_files{$group} = \%filenames;
36             }
37              
38 0         0 return \%groups_to_files;
39             }
40              
41             sub _build__lower_bound_value {
42 1     1   2 my ($self) = @_;
43 1         2 my $num_files = @{ $self->input_files };
  1         25  
44 1         48 return ceil( $num_files * ( $self->lower_bound_percentage / 100 ) );
45             }
46              
47             sub _build__upper_bound_value {
48 1     1   2 my ($self) = @_;
49 1         3 my $num_files = @{ $self->input_files };
  1         21  
50 1         24 return $num_files - ceil( $num_files * ( $self->upper_bound_percentage / 100 ) );
51             }
52              
53             sub create_accessory_binary_fasta {
54 2     2 0 5 my ($self) = @_;
55 2         58 my $out_seq_io = Bio::SeqIO->new( -file => ">" . $self->output_filename, -format => 'Fasta' );
56              
57 2         4058 for my $full_filename ( @{ $self->input_files } ) {
  2         58  
58 8         2455 my($filename, $dirs, $suffix) = fileparse($full_filename);
59            
60 8         14 my $output_sequence = '';
61 8         9 my $sample_name = $filename;
62 8         11 $sample_name =~ s!\.gff\.proteome\.faa!!;
63              
64 8         8 my $gene_count = 0;
65 8         10 for my $group ( sort keys %{ $self->groups_to_files } ) {
  8         189  
66 32 50       657 last if($gene_count > $self->max_accessory_to_include);
67              
68 32         35 my @files = keys %{ $self->groups_to_files->{$group} };
  32         591  
69              
70 32 100 100     652 next if ( @files <= $self->_lower_bound_value || @files > $self->_upper_bound_value );
71              
72 24         486 my $group_to_file_genes = $self->groups_to_files->{$group}->{$full_filename};
73 24 100 66     47 if ( defined($group_to_file_genes) && @{$group_to_file_genes} > 0 ) {
  15         36  
74 15         19 $output_sequence .= 'A';
75             }
76             else {
77 9         13 $output_sequence .= 'C';
78             }
79 24         37 $gene_count++;
80            
81             }
82 8 50       17 next if($output_sequence eq '');
83 8         38 $out_seq_io->write_seq( Bio::Seq->new( -display_id => $sample_name, -seq => $output_sequence ) );
84             }
85 2         748 return 1;
86             }
87              
88 2     2   14 no Moose;
  2         6  
  2         14  
89             __PACKAGE__->meta->make_immutable;
90              
91             1;
92              
93             __END__
94              
95             =pod
96              
97             =encoding UTF-8
98              
99             =head1 NAME
100              
101             Bio::Roary::AccessoryBinaryFasta - Output a FASTA file which represents the binary presence and absence of genes in the accessory genome
102              
103             =head1 VERSION
104              
105             version 3.11.0
106              
107             =head1 SYNOPSIS
108              
109             Output a FASTA file which represents the binary presence and absence of genes in the accessory genome
110             use Bio::Roary::AccessoryBinaryFasta;
111             my $obj = Bio::Roary::AccessoryBinaryFasta->new(input_files => ['abc','efg'],
112             groups_to_files => {'group_1' => ['abc'], group_2 => ['abc', 'efg']}
113             );
114             $obj->create_accessory_binary_fasta();
115              
116             =head1 AUTHOR
117              
118             Andrew J. Page <ap13@sanger.ac.uk>
119              
120             =head1 COPYRIGHT AND LICENSE
121              
122             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
123              
124             This is free software, licensed under:
125              
126             The GNU General Public License, Version 3, June 2007
127              
128             =cut