File Coverage

lib/Bio/MLST/OutputFasta.pm
Criterion Covered Total %
statement 59 59 100.0
branch 11 12 91.6
condition 9 15 60.0
subroutine 11 11 100.0
pod 1 1 100.0
total 91 98 92.8


line stmt bran cond sub pod time code
1             package Bio::MLST::OutputFasta;
2             # ABSTRACT: Take in two hashes, both containing sequence names and sequences and output fasta files.
3             $Bio::MLST::OutputFasta::VERSION = '2.1.1706216';
4              
5              
6 10     10   103597 use Moose;
  10         311630  
  10         68  
7 10     10   45327 use File::Basename;
  10         15  
  10         715  
8 10     10   46 use File::Path qw(make_path);
  10         12  
  10         405  
9 10     10   823 use Bio::PrimarySeq;
  10         39284  
  10         184  
10 10     10   516 use Bio::SeqIO;
  10         20220  
  10         178  
11 10     10   355 use Bio::MLST::Types;
  10         13  
  10         4494  
12              
13             has 'matching_sequences' => ( is => 'ro', isa => 'Maybe[HashRef]', required => 1 );
14             has 'non_matching_sequences' => ( is => 'ro', isa => 'Maybe[HashRef]', required => 1 );
15             has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 );
16             has 'input_fasta_file' => ( is => 'ro', isa => 'Bio::MLST::File', required => 1 );
17              
18             has '_fasta_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__fasta_filename' );
19             has 'concat_sequence' => ( is => 'rw', isa => 'Maybe[Str]' );
20              
21              
22             sub _build__fasta_filename
23             {
24 5     5   5 my($self) = @_;
25 5         88 my $filename = fileparse($self->input_fasta_file, qr/\.[^.]*$/);
26 5         150 return $filename;
27             }
28              
29             sub _sort_and_join_sequences
30             {
31 4     4   4 my($self, $combined_sequences) = @_;
32 4         4 my @allele_names = sort keys %{$combined_sequences};
  4         24  
33 4         7 my @sorted_sequences = map { $combined_sequences->{$_} } @allele_names;
  14         15  
34 4         11 join("", @sorted_sequences);
35             }
36              
37             sub create_files
38             {
39 5     5 1 6 my($self) = @_;
40 5         95 make_path($self->output_directory);
41 5         105 $self->_fasta_filename;
42 5 100 66     153 if((defined($self->matching_sequences) && %{$self->matching_sequences}) ||(defined($self->non_matching_sequences) && %{$self->non_matching_sequences}) )
  5   33     81  
  1   66     17  
43             {
44              
45 4         4 my %matching_sequences = %{$self->matching_sequences};
  4         65  
46 4         9 my %combined_sequences = (%matching_sequences);
47            
48 4 100 66     71 if(defined($self->non_matching_sequences) && %{$self->non_matching_sequences})
  4         65  
49             {
50 3         4 my %non_matching_sequences = %{$self->non_matching_sequences};
  3         67  
51 3         9 %combined_sequences = (%matching_sequences, %non_matching_sequences);
52             }
53 4         13 my $concat_sequence = $self->_sort_and_join_sequences(\%combined_sequences);
54            
55 4         75 $self->concat_sequence($concat_sequence);
56             }
57            
58 5 100 66     99 if(defined($self->non_matching_sequences) && %{$self->non_matching_sequences})
  5         90  
59             {
60             # create 1 FASTA file for each unknown allele with a close match to another allele
61 3         3 for my $sequence_name (keys %{$self->non_matching_sequences})
  3         52  
62             {
63 6 50       1300 next if(length($self->non_matching_sequences->{$sequence_name}) < 2);
64 6 100       124 next if($self->_does_sequence_contain_all_unknowns($self->non_matching_sequences->{$sequence_name}));
65 5         111 my $non_matching_output_filename = join('/',($self->output_directory, $self->_fasta_filename.'.unknown_allele.'.$sequence_name.'.fa'));
66 5         41 my $out = Bio::SeqIO->new(-file => "+>$non_matching_output_filename" , '-format' => 'Fasta');
67 5         14778 $out->write_seq(Bio::PrimarySeq->new(-seq => $self->non_matching_sequences->{$sequence_name}, -id => $sequence_name));
68             }
69             }
70 5         609 1;
71             }
72              
73             sub _does_sequence_contain_all_unknowns
74             {
75 6     6   10 my($self, $sequence) = @_;
76 6 100       14 return 1 if($sequence =~ m/^N+$/);
77 5         13 return 0;
78             }
79              
80              
81              
82 10     10   49 no Moose;
  10         10  
  10         57  
83             __PACKAGE__->meta->make_immutable;
84             1;
85              
86             __END__
87              
88             =pod
89              
90             =encoding UTF-8
91              
92             =head1 NAME
93              
94             Bio::MLST::OutputFasta - Take in two hashes, both containing sequence names and sequences and output fasta files.
95              
96             =head1 VERSION
97              
98             version 2.1.1706216
99              
100             =head1 SYNOPSIS
101              
102             Take in two hashes, both containing sequence names and sequences and output fasta files.
103              
104             use Bio::MLST::OutputFasta;
105            
106             my $output_fasta = Bio::MLST::OutputFasta->new(
107             matching_sequences => \%matching_sequences,
108             non_matching_sequences => \%non_matching_sequences,
109             output_directory => '/path/to/output',
110             input_fasta_file => '/path/to/fasta'
111             );
112             $output_fasta->create_files();
113              
114             =head1 METHODS
115              
116             =head2 create_files
117              
118             Create output fasta files.
119              
120             =head1 AUTHOR
121              
122             Andrew J. Page <ap13@sanger.ac.uk>
123              
124             =head1 COPYRIGHT AND LICENSE
125              
126             This software is Copyright (c) 2012 by Wellcome Trust Sanger Institute.
127              
128             This is free software, licensed under:
129              
130             The GNU General Public License, Version 3, June 2007
131              
132             =cut