File Coverage

lib/Bio/MLST/NormaliseFasta.pm
Criterion Covered Total %
statement 34 34 100.0
branch 2 2 100.0
condition n/a
subroutine 8 8 100.0
pod 1 1 100.0
total 45 45 100.0


line stmt bran cond sub pod time code
1             package Bio::MLST::NormaliseFasta;
2             # ABSTRACT: Take in a Fasta file, check for invalid characters and build a corrected file if needed.
3             $Bio::MLST::NormaliseFasta::VERSION = '2.1.1706216';
4              
5 10     10   143457 use Moose;
  10         307523  
  10         71  
6 10     10   44352 use Bio::SeqIO;
  10         12  
  10         200  
7 10     10   42 use File::Basename;
  10         12  
  10         710  
8 10     10   435 use Bio::MLST::Types;
  10         12  
  10         2752  
9              
10             has 'fasta_filename' => ( is => 'ro', isa => 'Bio::MLST::File', required => 1 );
11             has 'working_directory' => ( is => 'ro', isa => 'Str', required => 1 );
12              
13             has '_normalised_fasta_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__normalised_fasta_filename' );
14              
15             sub _build__normalised_fasta_filename
16             {
17 6     6   27 my($self) = @_;
18 6         184 my $fasta_obj = Bio::SeqIO->new( -file => $self->fasta_filename , -format => 'Fasta');
19            
20 6         44778 while(my $seq = $fasta_obj->next_seq())
21             {
22 7 100       1589 if($seq->id =~ m/\|/ )
23             {
24 1         11 return $self->_rename_sequences();
25             }
26             }
27            
28 5         638 return $self->fasta_filename;
29             }
30              
31             sub _rename_sequences
32             {
33 1     1   2 my($self) = @_;
34 1         25 my $in_fasta_obj = Bio::SeqIO->new( -file => $self->fasta_filename , -format => 'Fasta');
35 1         370 my($filename, $directories, $suffix) = fileparse($self->fasta_filename);
36 1         22 my $output_filename = $self->working_directory.'/'.$filename.$suffix ;
37 1         7 my $out_fasta_obj = Bio::SeqIO->new(-file => "+>".$output_filename , -format => 'Fasta');
38            
39 1         417 my $counter = 1;
40 1         3 while(my $seq = $in_fasta_obj->next_seq())
41             {
42 3         299 $seq->id($counter."");
43 3         26 $out_fasta_obj->write_seq($seq);
44 3         358 $counter++;
45             }
46 1         29 return $output_filename;
47             }
48              
49             sub processed_fasta_filename
50             {
51 6     6 1 24 my($self) = @_;
52 6         183 return $self->_normalised_fasta_filename;
53             }
54              
55 10     10   45 no Moose;
  10         12  
  10         48  
56             __PACKAGE__->meta->make_immutable;
57             1;
58              
59             __END__
60              
61             =pod
62              
63             =encoding UTF-8
64              
65             =head1 NAME
66              
67             Bio::MLST::NormaliseFasta - Take in a Fasta file, check for invalid characters and build a corrected file if needed.
68              
69             =head1 VERSION
70              
71             version 2.1.1706216
72              
73             =head1 SYNOPSIS
74              
75             Take in a Fasta file, check for invalid characters and build a corrected file if needed.
76             This is needed for NCBI makeblastdb which doesnt like the pipe character in the sequence name.
77              
78             use Bio::MLST::NormaliseFasta;
79            
80             my $output_fasta = Bio::MLST::NormaliseFasta->new(
81             fasta_filename => 'Filename.fasta'
82            
83             );
84             $output_fasta->processed_fasta_filename();
85              
86             =head1 METHODS
87              
88             =head2 processed_fasta_filename
89              
90             Output a temporary fasta file thats been cleaned up.
91              
92             =head1 AUTHOR
93              
94             Andrew J. Page <ap13@sanger.ac.uk>
95              
96             =head1 COPYRIGHT AND LICENSE
97              
98             This software is Copyright (c) 2012 by Wellcome Trust Sanger Institute.
99              
100             This is free software, licensed under:
101              
102             The GNU General Public License, Version 3, June 2007
103              
104             =cut