File Coverage

lib/Bio/Roary/UniqueGenesPerSample.pm
Criterion Covered Total %
statement 35 36 97.2
branch 7 12 58.3
condition n/a
subroutine 6 6 100.0
pod 0 1 0.0
total 48 55 87.2


line stmt bran cond sub pod time code
1             package Bio::Roary::UniqueGenesPerSample;
2             $Bio::Roary::UniqueGenesPerSample::VERSION = '3.10.1';
3             # ABSTRACT: Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
4              
5              
6 1     1   88680 use Moose;
  1         382890  
  1         6  
7 1     1   6551 use Bio::Roary::Exceptions;
  1         2  
  1         356  
8              
9             has 'clustered_proteins' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
10             has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'unique_genes_per_sample.tsv' );
11              
12             has '_output_fh' => ( is => 'ro', lazy => 1, builder => '_build__output_fh' );
13              
14             sub _build__output_fh {
15 1     1   2 my ($self) = @_;
16 1 50       24 open( my $fh, '>', $self->output_filename )
17             or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => "Couldnt write output file:" . $self->output_filename );
18 1         27 return $fh;
19             }
20              
21             #group_17585: 14520_6#21_00645
22             sub _sample_to_gene_freq {
23 2     2   3 my ($self) = @_;
24              
25 2 50       70 open( my $input_fh, $self->clustered_proteins )
26             or Bio::Roary::Exceptions::FileNotFound->throw( error => "Couldnt read input file:" . $self->clustered_proteins );
27              
28 2         6 my %sample_to_gene_freq;
29 2         15 while (<$input_fh>) {
30 12         16 chomp;
31 12         14 my $line = $_;
32 12 50       17 next if ( length( $line ) < 6 );
33 12 100       35 if ( $line =~ /^.+: ([^\s]+)$/ ) {
34 10         16 my $gene_id = $1;
35 10 50       24 if ( $gene_id =~ /^(.+)_[\d]+$/ ) {
36 10         13 my $sample_name = $1;
37 10         28 $sample_to_gene_freq{$sample_name}++;
38             }
39             else {
40             # gene id may not be valid so ignore
41 0         0 next;
42             }
43             }
44             else {
45             # its either an invalid line or theres more than 1 gene in the cluster
46 2         6 next;
47             }
48             }
49              
50 2         22 return \%sample_to_gene_freq;
51             }
52              
53             sub write_unique_frequency {
54 1     1 0 3 my ($self) = @_;
55              
56 1         2 my %sample_to_gene_freq = %{$self->_sample_to_gene_freq};
  1         2  
57            
58 1 50       6 for my $sample ( sort { $sample_to_gene_freq{$b} <=> $sample_to_gene_freq{$a} || $a cmp $b } keys %sample_to_gene_freq ) {
  4         11  
59 4         8 print { $self->_output_fh } $sample . "\t" . $sample_to_gene_freq{$sample} . "\n";
  4         78  
60             }
61 1         19 close($self->_output_fh);
62 1         6 return 1;
63             }
64              
65 1     1   6 no Moose;
  1         2  
  1         6  
66             __PACKAGE__->meta->make_immutable;
67              
68             1;
69              
70             __END__
71              
72             =pod
73              
74             =encoding UTF-8
75              
76             =head1 NAME
77              
78             Bio::Roary::UniqueGenesPerSample - Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
79              
80             =head1 VERSION
81              
82             version 3.10.1
83              
84             =head1 SYNOPSIS
85              
86             Take in the clustered file and produce a sorted file with the frequency of each samples unique genes
87             use Bio::Roary::UniqueGenesPerSample;
88              
89             my $obj = Bio::Roary::SequenceLengths->new(
90             clustered_proteins => 'clustered_proteins',
91             output_filename => 'output_filename',
92             );
93             $obj->write_unique_frequency;
94              
95             =head1 AUTHOR
96              
97             Andrew J. Page <ap13@sanger.ac.uk>
98              
99             =head1 COPYRIGHT AND LICENSE
100              
101             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
102              
103             This is free software, licensed under:
104              
105             The GNU General Public License, Version 3, June 2007
106              
107             =cut