File Coverage

lib/Bio/MLST/CDC/Convert.pm
Criterion Covered Total %
statement 60 60 100.0
branch 1 2 50.0
condition n/a
subroutine 14 14 100.0
pod 1 1 100.0
total 76 77 98.7


line stmt bran cond sub pod time code
1             package Bio::MLST::CDC::Convert;
2             # ABSTRACT: Take in a fasta file of emmST sequences and convert it into an MLST format
3             $Bio::MLST::CDC::Convert::VERSION = '2.1.1706216';
4              
5              
6 2     2   1436 use Moose;
  2         2  
  2         14  
7 2     2   9014 use File::Basename;
  2         2  
  2         142  
8 2     2   8 use File::Path qw(make_path);
  2         2  
  2         72  
9 2     2   8 use Bio::PrimarySeq;
  2         4  
  2         32  
10 2     2   8 use Bio::SeqIO;
  2         2  
  2         26  
11 2     2   8 use Bio::MLST::Types;
  2         0  
  2         28  
12 2     2   6 use Text::CSV;
  2         4  
  2         960  
13              
14             with 'Bio::MLST::Download::Downloadable';
15              
16             has 'species' => ( is => 'ro', isa => 'Str', required => 1 );
17             has 'input_file' => ( is => 'ro', isa => 'Str', required => 1 );
18             has 'gene_name' => ( is => 'ro', isa => 'Str', required => 1 );
19             has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 );
20              
21             has 'destination_directory' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_destination_directory' );
22             has '_output_allele_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__output_allele_filename' );
23             has '_output_profile_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__output_profile_filename' );
24              
25             sub _build__output_allele_filename
26             {
27 2     2   2 my ($self) = @_;
28 2         48 join('/',($self->destination_directory, 'alleles',$self->gene_name.'.tfa'));
29             }
30              
31             sub _build__output_profile_filename
32             {
33 2     2   4 my ($self) = @_;
34 2         40 join('/',($self->destination_directory, 'profiles',$self->_sub_directory.'.txt'));
35             }
36              
37             sub _build_destination_directory
38             {
39 2     2   4 my ($self) = @_;
40 2         38 my $destination_directory = join('/',($self->base_directory,$self->_sub_directory));
41 2         312 make_path($destination_directory);
42 2         206 make_path(join('/',($destination_directory,'alleles')));
43 2         216 make_path(join('/',($destination_directory,'profiles')));
44 2         52 return $destination_directory;
45             }
46              
47             sub _sub_directory
48             {
49 4     4   4 my ($self) = @_;
50 4         68 my $combined_name = join('_',($self->species));
51 4         8 $combined_name =~ s!\.$!!gi;
52 4         16 $combined_name =~ s!\W!_!gi;
53 4         44 return $combined_name;
54             }
55              
56              
57             sub create_mlst_files
58             {
59 2     2 1 4 my ($self) = @_;
60            
61 2         54 $self->_download_file($self->input_file,$self->destination_directory);
62            
63 2         56 my $fasta_obj = Bio::SeqIO->new( -file => join('/',($self->destination_directory, $self->_get_filename_from_url($self->input_file))) , -format => 'Fasta');
64 2         5820 my $out_fasta_obj = Bio::SeqIO->new(-file => "+>".$self->_output_allele_filename , -format => 'Fasta');
65            
66 2         838 my @sequence_names;
67 2         6 my $counter = 1;
68 2         8 while(my $seq = $fasta_obj->next_seq())
69             {
70 14         1846 my $normalised_name = $self->gene_name."-".$counter;
71 14         42 push(@sequence_names,[$seq->id,$counter]);
72 14         148 $seq->id($normalised_name);
73              
74 14         96 $out_fasta_obj->write_seq($seq);
75 14         1594 $counter++;
76             }
77            
78 2         62 $self->_create_profile(\@sequence_names);
79 2         92 return $self;
80             }
81              
82             sub _create_profile
83             {
84 2     2   4 my ($self,$sequence_names) = @_;
85 2 50       56 open(my $profile, '+>', $self->_output_profile_filename ) or die 'Couldnt open output profile file';
86              
87 2         24 my $csv_out = Text::CSV->new({binary=>1, always_quote=>1, sep_char=>"\t", eol=>"\n"});
88 2         312 $csv_out->print($profile,['ST',$self->gene_name]);
89            
90            
91 2         28 for my $sequence_type_details (@{$sequence_names})
  2         4  
92             {
93 14         56 $csv_out->print($profile,$sequence_type_details);
94             }
95             }
96              
97 2     2   10 no Moose;
  2         2  
  2         8  
98             __PACKAGE__->meta->make_immutable;
99             1;
100              
101             __END__
102              
103             =pod
104              
105             =encoding UTF-8
106              
107             =head1 NAME
108              
109             Bio::MLST::CDC::Convert - Take in a fasta file of emmST sequences and convert it into an MLST format
110              
111             =head1 VERSION
112              
113             version 2.1.1706216
114              
115             =head1 SYNOPSIS
116              
117             ake in a fasta file of emmST sequences and convert it into an MLST format, producing an allele file, and a profile.
118              
119             use Bio::MLST::CDC::Convert;
120            
121             my $convert_fasta = Bio::MLST::CDC::Convert->new(
122             species => 'Streptococcus pyogenes emmST',
123             input_file => 't/data/CDC_emmST_partial.tfa',
124             gene_name => 'emmST',
125             base_directory => '/path/to/output/dir'
126             );
127             $convert_fasta->create_mlst_files();
128              
129             =head1 METHODS
130              
131             =head2 create_mlst_files
132              
133             Create an allele file and a profile, in the MLST directory structure.
134              
135             =head1 AUTHOR
136              
137             Andrew J. Page <ap13@sanger.ac.uk>
138              
139             =head1 COPYRIGHT AND LICENSE
140              
141             This software is Copyright (c) 2012 by Wellcome Trust Sanger Institute.
142              
143             This is free software, licensed under:
144              
145             The GNU General Public License, Version 3, June 2007
146              
147             =cut