File Coverage

lib/Bio/MLST/OutputFasta.pm
Criterion Covered Total %
statement 59 59 100.0
branch 11 12 91.6
condition 9 15 60.0
subroutine 11 11 100.0
pod 1 1 100.0
total 91 98 92.8


line stmt bran cond sub pod time code
1             package Bio::MLST::OutputFasta;
2             # ABSTRACT: Take in two hashes, both containing sequence names and sequences and output fasta files.
3             $Bio::MLST::OutputFasta::VERSION = '2.1.1630910';
4              
5              
6 11     11   145949 use Moose;
  11         449571  
  11         108  
7 11     11   70225 use File::Basename;
  11         68  
  11         1046  
8 11     11   66 use File::Path qw(make_path);
  11         17  
  11         581  
9 11     11   1104 use Bio::PrimarySeq;
  11         69750  
  11         286  
10 11     11   1100 use Bio::SeqIO;
  11         38855  
  11         371  
11 11     11   537 use Bio::MLST::Types;
  11         18  
  11         7943  
12              
13             has 'matching_sequences' => ( is => 'ro', isa => 'Maybe[HashRef]', required => 1 );
14             has 'non_matching_sequences' => ( is => 'ro', isa => 'Maybe[HashRef]', required => 1 );
15             has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 );
16             has 'input_fasta_file' => ( is => 'ro', isa => 'Bio::MLST::File', required => 1 );
17              
18             has '_fasta_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__fasta_filename' );
19             has 'concat_sequence' => ( is => 'rw', isa => 'Maybe[Str]' );
20              
21              
22             sub _build__fasta_filename
23             {
24 5     5   8 my($self) = @_;
25 5         107 my $filename = fileparse($self->input_fasta_file, qr/\.[^.]*$/);
26 5         113 return $filename;
27             }
28              
29             sub _sort_and_join_sequences
30             {
31 4     4   5 my($self, $combined_sequences) = @_;
32 4         7 my @allele_names = sort keys %{$combined_sequences};
  4         19  
33 4         9 my @sorted_sequences = map { $combined_sequences->{$_} } @allele_names;
  14         22  
34 4         15 join("", @sorted_sequences);
35             }
36              
37             sub create_files
38             {
39 5     5 1 9 my($self) = @_;
40 5         116 make_path($self->output_directory);
41 5         128 $self->_fasta_filename;
42 5 100 66     109 if((defined($self->matching_sequences) && %{$self->matching_sequences}) ||(defined($self->non_matching_sequences) && %{$self->non_matching_sequences}) )
  5   33     98  
  1   66     21  
43             {
44              
45 4         6 my %matching_sequences = %{$self->matching_sequences};
  4         78  
46 4         11 my %combined_sequences = (%matching_sequences);
47            
48 4 100 66     86 if(defined($self->non_matching_sequences) && %{$self->non_matching_sequences})
  4         83  
49             {
50 3         5 my %non_matching_sequences = %{$self->non_matching_sequences};
  3         62  
51 3         13 %combined_sequences = (%matching_sequences, %non_matching_sequences);
52             }
53 4         15 my $concat_sequence = $self->_sort_and_join_sequences(\%combined_sequences);
54            
55 4         93 $self->concat_sequence($concat_sequence);
56             }
57            
58 5 100 66     105 if(defined($self->non_matching_sequences) && %{$self->non_matching_sequences})
  5         101  
59             {
60             # create 1 FASTA file for each unknown allele with a close match to another allele
61 3         4 for my $sequence_name (keys %{$self->non_matching_sequences})
  3         61  
62             {
63 6 50       1242 next if(length($self->non_matching_sequences->{$sequence_name}) < 2);
64 6 100       141 next if($self->_does_sequence_contain_all_unknowns($self->non_matching_sequences->{$sequence_name}));
65 5         121 my $non_matching_output_filename = join('/',($self->output_directory, $self->_fasta_filename.'.unknown_allele.'.$sequence_name.'.fa'));
66 5         44 my $out = Bio::SeqIO->new(-file => "+>$non_matching_output_filename" , '-format' => 'Fasta');
67 5         17144 $out->write_seq(Bio::PrimarySeq->new(-seq => $self->non_matching_sequences->{$sequence_name}, -id => $sequence_name));
68             }
69             }
70 5         1196 1;
71             }
72              
73             sub _does_sequence_contain_all_unknowns
74             {
75 6     6   10 my($self, $sequence) = @_;
76 6 100       19 return 1 if($sequence =~ m/^N+$/);
77 5         13 return 0;
78             }
79              
80              
81              
82 11     11   68 no Moose;
  11         18  
  11         70  
83             __PACKAGE__->meta->make_immutable;
84             1;
85              
86             __END__
87              
88             =pod
89              
90             =encoding UTF-8
91              
92             =head1 NAME
93              
94             Bio::MLST::OutputFasta - Take in two hashes, both containing sequence names and sequences and output fasta files.
95              
96             =head1 VERSION
97              
98             version 2.1.1630910
99              
100             =head1 SYNOPSIS
101              
102             Take in two hashes, both containing sequence names and sequences and output fasta files.
103              
104             use Bio::MLST::OutputFasta;
105            
106             my $output_fasta = Bio::MLST::OutputFasta->new(
107             matching_sequences => \%matching_sequences,
108             non_matching_sequences => \%non_matching_sequences,
109             output_directory => '/path/to/output',
110             input_fasta_file => '/path/to/fasta'
111             );
112             $output_fasta->create_files();
113              
114             =head1 METHODS
115              
116             =head2 create_files
117              
118             Create output fasta files.
119              
120             =head1 AUTHOR
121              
122             Andrew J. Page <ap13@sanger.ac.uk>
123              
124             =head1 COPYRIGHT AND LICENSE
125              
126             This software is Copyright (c) 2012 by Wellcome Trust Sanger Institute.
127              
128             This is free software, licensed under:
129              
130             The GNU General Public License, Version 3, June 2007
131              
132             =cut