File Coverage

lib/Bio/Roary/GeneNamesFromGFF.pm
Criterion Covered Total %
statement 37 38 97.3
branch 12 18 66.6
condition n/a
subroutine 5 5 100.0
pod n/a
total 54 61 88.5


line stmt bran cond sub pod time code
1             package Bio::Roary::GeneNamesFromGFF;
2             $Bio::Roary::GeneNamesFromGFF::VERSION = '3.11.0';
3             # ABSTRACT: Parse a GFF and efficiently extract ID -> Gene Name
4              
5              
6 18     18   90988 use Moose;
  18         395386  
  18         99  
7              
8 18     18   114886 use Bio::Tools::GFF;
  18         1112223  
  18         6243  
9             with 'Bio::Roary::ParseGFFAnnotationRole';
10              
11             has 'ids_to_gene_name' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_ids_to_gene_name' );
12             has 'ids_to_product' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
13             has 'ids_to_gene_size' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
14              
15             # Parsing with the perl GFF module is exceptionally slow.
16             sub _build_ids_to_gene_name {
17 42     42   96 my ($self) = @_;
18 42         68 my %id_to_gene_name;
19              
20 42         967 my $gffio = Bio::Tools::GFF->new( -file => $self->gff_file, -gff_version => 3 );
21 42         31365 while ( my $feature = $gffio->next_feature() ) {
22 641         299282 my $gene_id = $self->_get_feature_id($feature);
23 641 50       1002 next unless ($gene_id);
24              
25 641 100       1149 if ( $feature->has_tag('gene') ) {
    100          
26 196         971 my ( $gene_name, @junk ) = $feature->get_tag_values('gene');
27 196         1545 $gene_name =~ s!"!!g;
28 196 50       422 if ( $gene_name ne "" ) {
29 196         541 $id_to_gene_name{$gene_id} = $gene_name;
30             }
31             }
32             elsif ( $feature->has_tag('Name') ) {
33 8         69 my ( $gene_name, @junk ) = $feature->get_tag_values('Name');
34 8         63 $gene_name =~ s!"!!g;
35 8 50       15 if ( $gene_name ne "" ) {
36 8         20 $id_to_gene_name{$gene_id} = $gene_name;
37             }
38             }
39            
40 641 50       4468 if ( $feature->has_tag('product') ) {
41 641         3095 my ( $product, @junk ) = $feature->get_tag_values('product');
42 641         20175 $self->ids_to_product->{$gene_id} = $product;
43             }
44 641         1553 $self->ids_to_gene_size->{$gene_id} = $feature->end - $feature->start;
45             }
46              
47 42         250123 return \%id_to_gene_name;
48             }
49              
50             sub _get_feature_id {
51 641     641   1002 my ( $self, $feature ) = @_;
52 641         771 my ( $gene_id, @junk );
53 641 100       1194 if ( $feature->has_tag('ID') ) {
    50          
54 625         3414 ( $gene_id, @junk ) = $feature->get_tag_values('ID');
55             }
56             elsif ( $feature->has_tag('locus_tag') ) {
57 16         126 ( $gene_id, @junk ) = $feature->get_tag_values('locus_tag');
58             }
59             else {
60 0         0 return undef;
61             }
62 641         5586 $gene_id =~ s!["']!!g;
63 641 50       1196 return undef if ( $gene_id eq "" );
64 641         1111 return $gene_id;
65             }
66              
67 18     18   191 no Moose;
  18         43  
  18         169  
68             __PACKAGE__->meta->make_immutable;
69              
70             1;
71              
72             __END__
73              
74             =pod
75              
76             =encoding UTF-8
77              
78             =head1 NAME
79              
80             Bio::Roary::GeneNamesFromGFF - Parse a GFF and efficiently extract ID -> Gene Name
81              
82             =head1 VERSION
83              
84             version 3.11.0
85              
86             =head1 SYNOPSIS
87              
88             Parse a GFF and efficiently extract ID -> Gene Name
89             use Bio::Roary::GeneNamesFromGFF;
90              
91             my $obj = Bio::Roary::GeneNamesFromGFF->new(
92             gff_file => 'abc.gff'
93             );
94             $obj->ids_to_gene_name;
95              
96             =head1 AUTHOR
97              
98             Andrew J. Page <ap13@sanger.ac.uk>
99              
100             =head1 COPYRIGHT AND LICENSE
101              
102             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
103              
104             This is free software, licensed under:
105              
106             The GNU General Public License, Version 3, June 2007
107              
108             =cut