File Coverage

lib/Bio/MLST/NormaliseFasta.pm
Criterion Covered Total %
statement 34 34 100.0
branch 2 2 100.0
condition n/a
subroutine 8 8 100.0
pod 1 1 100.0
total 45 45 100.0


line stmt bran cond sub pod time code
1             package Bio::MLST::NormaliseFasta;
2             # ABSTRACT: Take in a Fasta file, check for invalid characters and build a corrected file if needed.
3             $Bio::MLST::NormaliseFasta::VERSION = '2.1.1630910';
4              
5 11     11   246672 use Moose;
  11         497858  
  11         85  
6 11     11   71790 use Bio::SeqIO;
  11         20  
  11         293  
7 11     11   52 use File::Basename;
  11         19  
  11         914  
8 11     11   570 use Bio::MLST::Types;
  11         24  
  11         4343  
9              
10             has 'fasta_filename' => ( is => 'ro', isa => 'Bio::MLST::File', required => 1 );
11             has 'working_directory' => ( is => 'ro', isa => 'Str', required => 1 );
12              
13             has '_normalised_fasta_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__normalised_fasta_filename' );
14              
15             sub _build__normalised_fasta_filename
16             {
17 6     6   42 my($self) = @_;
18 6         346 my $fasta_obj = Bio::SeqIO->new( -file => $self->fasta_filename , -format => 'Fasta');
19            
20 6         90951 while(my $seq = $fasta_obj->next_seq())
21             {
22 7 100       3001 if($seq->id =~ m/\|/ )
23             {
24 1         23 return $self->_rename_sequences();
25             }
26             }
27            
28 5         1040 return $self->fasta_filename;
29             }
30              
31             sub _rename_sequences
32             {
33 1     1   4 my($self) = @_;
34 1         52 my $in_fasta_obj = Bio::SeqIO->new( -file => $self->fasta_filename , -format => 'Fasta');
35 1         770 my($filename, $directories, $suffix) = fileparse($self->fasta_filename);
36 1         47 my $output_filename = $self->working_directory.'/'.$filename.$suffix ;
37 1         14 my $out_fasta_obj = Bio::SeqIO->new(-file => "+>".$output_filename , -format => 'Fasta');
38            
39 1         788 my $counter = 1;
40 1         5 while(my $seq = $in_fasta_obj->next_seq())
41             {
42 3         606 $seq->id($counter."");
43 3         54 $out_fasta_obj->write_seq($seq);
44 3         720 $counter++;
45             }
46 1         77 return $output_filename;
47             }
48              
49             sub processed_fasta_filename
50             {
51 6     6 1 16 my($self) = @_;
52 6         391 return $self->_normalised_fasta_filename;
53             }
54              
55 11     11   77 no Moose;
  11         21  
  11         92  
56             __PACKAGE__->meta->make_immutable;
57             1;
58              
59             __END__
60              
61             =pod
62              
63             =encoding UTF-8
64              
65             =head1 NAME
66              
67             Bio::MLST::NormaliseFasta - Take in a Fasta file, check for invalid characters and build a corrected file if needed.
68              
69             =head1 VERSION
70              
71             version 2.1.1630910
72              
73             =head1 SYNOPSIS
74              
75             Take in a Fasta file, check for invalid characters and build a corrected file if needed.
76             This is needed for NCBI makeblastdb which doesnt like the pipe character in the sequence name.
77              
78             use Bio::MLST::NormaliseFasta;
79            
80             my $output_fasta = Bio::MLST::NormaliseFasta->new(
81             fasta_filename => 'Filename.fasta'
82            
83             );
84             $output_fasta->processed_fasta_filename();
85              
86             =head1 METHODS
87              
88             =head2 processed_fasta_filename
89              
90             Output a temporary fasta file thats been cleaned up.
91              
92             =head1 AUTHOR
93              
94             Andrew J. Page <ap13@sanger.ac.uk>
95              
96             =head1 COPYRIGHT AND LICENSE
97              
98             This software is Copyright (c) 2012 by Wellcome Trust Sanger Institute.
99              
100             This is free software, licensed under:
101              
102             The GNU General Public License, Version 3, June 2007
103              
104             =cut