File Coverage

lib/Bio/Roary/ClustersRole.pm
Criterion Covered Total %
statement 27 27 100.0
branch 8 8 100.0
condition n/a
subroutine 4 4 100.0
pod n/a
total 39 39 100.0


line stmt bran cond sub pod time code
1             package Bio::Roary::ClustersRole;
2             # ABSTRACT: A role to read a clusters file from CD hit
3             $Bio::Roary::ClustersRole::VERSION = '3.10.2';
4              
5 5     5   2803 use Moose::Role;
  5         12471  
  5         25  
6 5     5   24705 use Bio::Roary::Exceptions;
  5         9  
  5         1627  
7              
8             has 'clusters_filename' => ( is => 'ro', isa => 'Str', required => 1 );
9             has '_clustered_genes' => ( is => 'ro',lazy => 1, builder => '_build__clustered_genes' );
10             has '_clusters_fh' => ( is => 'ro',lazy => 1, builder => '_build__clusters_fh' );
11              
12             sub _build__clusters_fh
13             {
14 4     4   10 my($self) = @_;
15 4 100       116 open(my $fh, $self->clusters_filename) or Bio::Roary::Exceptions::FileNotFound->throw( error => 'Cant open file: ' . $self->clusters_filename );
16 3         61 return $fh;
17             }
18              
19             sub _build__clustered_genes
20             {
21 4     4   10 my($self) = @_;
22 4         78 my $fh = $self->_clusters_fh;
23 3         7 my %clustered_genes ;
24              
25             my %raw_clusters;
26 3         0 my $current_cluster_name;
27 3         35 while(<$fh>)
28             {
29 113         122 my $line = $_;
30 113 100       171 if($line =~ /^>(.+)$/)
31             {
32 31         42 $current_cluster_name = $1;
33             }
34            
35             #>Cluster 5
36             #0 4201aa, >6630_4#9_00008... *
37             #1 4201aa, >6631_1#23_00379... at 100.00%
38            
39 113 100       279 if($line =~ /[\d]+\t[\w]+, >(.+)\.\.\. (.+)$/)
40             {
41 82         115 my $gene_name = $1;
42 82         78 my $identity = $2;
43            
44 82 100       97 if($identity eq '*')
45             {
46 31         89 $raw_clusters{$current_cluster_name}{representative_gene_name} = $gene_name;
47             }
48             else
49             {
50 51         41 push(@{$raw_clusters{$current_cluster_name}{gene_names}}, $gene_name);
  51         163  
51             }
52             }
53             }
54            
55             # iterate over the raw clusters and convert to a simple hash
56 3         12 for my $cluster_name (keys %raw_clusters)
57             {
58 31         57 $clustered_genes{$raw_clusters{$cluster_name}{representative_gene_name}} = $raw_clusters{$cluster_name}{gene_names};
59             }
60            
61 3         76 return \%clustered_genes;
62             }
63              
64             1;
65              
66             __END__
67              
68             =pod
69              
70             =encoding UTF-8
71              
72             =head1 NAME
73              
74             Bio::Roary::ClustersRole - A role to read a clusters file from CD hit
75              
76             =head1 VERSION
77              
78             version 3.10.2
79              
80             =head1 SYNOPSIS
81              
82             A role to read a clusters file from CD hit
83             with 'Bio::Roary::ClustersRole';
84              
85             =head1 AUTHOR
86              
87             Andrew J. Page <ap13@sanger.ac.uk>
88              
89             =head1 COPYRIGHT AND LICENSE
90              
91             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
92              
93             This is free software, licensed under:
94              
95             The GNU General Public License, Version 3, June 2007
96              
97             =cut