File Coverage

lib/Bio/Roary/GeneNamesFromGFF.pm
Criterion Covered Total %
statement 37 38 97.3
branch 12 18 66.6
condition n/a
subroutine 5 5 100.0
pod n/a
total 54 61 88.5


line stmt bran cond sub pod time code
1             package Bio::Roary::GeneNamesFromGFF;
2             $Bio::Roary::GeneNamesFromGFF::VERSION = '3.10.2';
3             # ABSTRACT: Parse a GFF and efficiently extract ID -> Gene Name
4              
5              
6 18     18   82111 use Moose;
  18         380045  
  18         98  
7              
8 18     18   107743 use Bio::Tools::GFF;
  18         1052253  
  18         5907  
9             with 'Bio::Roary::ParseGFFAnnotationRole';
10              
11             has 'ids_to_gene_name' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_ids_to_gene_name' );
12             has 'ids_to_product' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
13             has 'ids_to_gene_size' => ( is => 'rw', isa => 'HashRef', default => sub { {} } );
14              
15             # Parsing with the perl GFF module is exceptionally slow.
16             sub _build_ids_to_gene_name {
17 42     42   100 my ($self) = @_;
18 42         76 my %id_to_gene_name;
19              
20 42         997 my $gffio = Bio::Tools::GFF->new( -file => $self->gff_file, -gff_version => 3 );
21 42         31412 while ( my $feature = $gffio->next_feature() ) {
22 641         306930 my $gene_id = $self->_get_feature_id($feature);
23 641 50       1239 next unless ($gene_id);
24              
25 641 100       1430 if ( $feature->has_tag('gene') ) {
    100          
26 196         1051 my ( $gene_name, @junk ) = $feature->get_tag_values('gene');
27 196         1704 $gene_name =~ s!"!!g;
28 196 50       430 if ( $gene_name ne "" ) {
29 196         568 $id_to_gene_name{$gene_id} = $gene_name;
30             }
31             }
32             elsif ( $feature->has_tag('Name') ) {
33 8         63 my ( $gene_name, @junk ) = $feature->get_tag_values('Name');
34 8         92 $gene_name =~ s!"!!g;
35 8 50       14 if ( $gene_name ne "" ) {
36 8         17 $id_to_gene_name{$gene_id} = $gene_name;
37             }
38             }
39            
40 641 50       4330 if ( $feature->has_tag('product') ) {
41 641         3146 my ( $product, @junk ) = $feature->get_tag_values('product');
42 641         20726 $self->ids_to_product->{$gene_id} = $product;
43             }
44 641         1609 $self->ids_to_gene_size->{$gene_id} = $feature->end - $feature->start;
45             }
46              
47 42         261325 return \%id_to_gene_name;
48             }
49              
50             sub _get_feature_id {
51 641     641   1079 my ( $self, $feature ) = @_;
52 641         789 my ( $gene_id, @junk );
53 641 100       1259 if ( $feature->has_tag('ID') ) {
    50          
54 625         3540 ( $gene_id, @junk ) = $feature->get_tag_values('ID');
55             }
56             elsif ( $feature->has_tag('locus_tag') ) {
57 16         138 ( $gene_id, @junk ) = $feature->get_tag_values('locus_tag');
58             }
59             else {
60 0         0 return undef;
61             }
62 641         5705 $gene_id =~ s!["']!!g;
63 641 50       1250 return undef if ( $gene_id eq "" );
64 641         1209 return $gene_id;
65             }
66              
67 18     18   174 no Moose;
  18         69  
  18         192  
68             __PACKAGE__->meta->make_immutable;
69              
70             1;
71              
72             __END__
73              
74             =pod
75              
76             =encoding UTF-8
77              
78             =head1 NAME
79              
80             Bio::Roary::GeneNamesFromGFF - Parse a GFF and efficiently extract ID -> Gene Name
81              
82             =head1 VERSION
83              
84             version 3.10.2
85              
86             =head1 SYNOPSIS
87              
88             Parse a GFF and efficiently extract ID -> Gene Name
89             use Bio::Roary::GeneNamesFromGFF;
90              
91             my $obj = Bio::Roary::GeneNamesFromGFF->new(
92             gff_file => 'abc.gff'
93             );
94             $obj->ids_to_gene_name;
95              
96             =head1 AUTHOR
97              
98             Andrew J. Page <ap13@sanger.ac.uk>
99              
100             =head1 COPYRIGHT AND LICENSE
101              
102             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
103              
104             This is free software, licensed under:
105              
106             The GNU General Public License, Version 3, June 2007
107              
108             =cut