File Coverage

blib/lib/Bio/LITE/Taxonomy/NCBI.pm
Criterion Covered Total %
statement 61 85 71.7
branch 14 32 43.7
condition 3 11 27.2
subroutine 12 17 70.5
pod 4 4 100.0
total 94 149 63.0


line stmt bran cond sub pod time code
1             package Bio::LITE::Taxonomy::NCBI;
2              
3             =head1 NAME
4              
5             Bio::LITE::Taxonomy::NCBI - Lightweight and efficient NCBI taxonomic manager
6              
7             =head1 SYNOPSIS
8              
9             use Bio::LITE::Taxonomy::NCBI;
10              
11             my $taxDB = Bio::LITE::Taxonomy::NCBI->new (
12             db=>"NCBI",
13             names=> "/path/to/names.dmp",
14             nodes=>"/path/to/nodes.dmp"
15             );
16              
17             my $tax = $taxDB->get_taxonomy(1442); # 1442 is a Taxid
18             my $taxid = $taxDB->get_taxid_from_name("Bacteroidetes");
19             my $term = $taxDB->get_term_at_level(1442,"family");
20              
21             my $taxDB2 = Bio::LITE::Taxonomy::NCBI-> new (
22             db=>"NCBI",
23             names=> "/path/to/names.dmp",
24             nodes=>"/path/to/nodes.dmp",
25             dict=>"/path/to/dictionary/file",
26             );
27             my $tax2 = $taxDB2->get_taxonomy_from_gi(12553);
28              
29             # Methods from Bio::LITE::Taxonomy::NCBI::Gi2taxid
30             # can also be called directly:
31              
32             my $taxid2 = $taxDB2->get_taxid(12553);
33              
34              
35             =head1 DESCRIPTION
36              
37             This module provides easy and efficient access to the NCBI taxonomy with minimal dependencies and without intermediary databases.
38              
39             This module is not part of the Bioperl bundle. For bioperl alternatives see the L section of this document.
40              
41             =head1 CONSTRUCTOR
42              
43             =head2 C
44              
45             Creates a Bio::LITE::Taxonomy::NCBI object.
46              
47             The following parameters are needed
48              
49             =over 4
50              
51             =item names
52              
53             The location of the I file. Filehandles are also allowed. I.
54              
55             =item nodes
56              
57             The location of the I file. Filehandles are also allowed. I.
58              
59             =item dict
60              
61             You can query the tree using Cs directly instead of Cs. For doing this, you should provide the NCBIs GI to Taxid mapper in binary format as explained in L. I
62              
63             =item save_mem
64              
65             Use this option to avoid to load the binary dictionary (GI to Taxid) into memory. This will save almost 1GB of system memory but looking up for Taxids will be ~20% slower. This parameter is optional, only makes sense if you are using the C to C dictionary and is I by default.
66              
67             =back
68              
69             =head1 METHODS
70              
71             This module inherits from L so all the methods explained there are accessible.
72             These methods are also available:
73              
74             =over 4
75              
76             =item get_taxonomy_from_gi
77              
78             Accepts a C as input and returns an array with its ascendants ordered from top to bottom.
79              
80             my @tax = $tax->get_taxonomy_from_gi($gi);
81             print "$_\n" for (@tax);
82              
83             If called in scalar context, returns an array reference instead of the array.
84             See L::get_taxonomy
85              
86             =item get_taxonomy_with_levels_from_gi
87              
88             The same as get_taxonomy_from_gi but instead of getting the ascendants returns an array of array references. Each array reference has the ascendant and its taxonomic level (at positions 0 and 1 respectively). This is simpler than it sounds. Check L::get_taxonomy_with_levels for more information.
89              
90             If called in scalar context, returns an array reference instead of the array.
91              
92             =item get_term_at_level_from_gi
93              
94             Given a gi and a taxonomic level as input, returns the taxon. For example,
95              
96             my $taxon = $tax->get_term_at_level_from_gi($gi,"family");
97              
98             See L::get_term_at_level.
99              
100             =back
101              
102             =head1 SEE ALSO
103              
104             L - Module to convert NCBIs GIs to Taxids
105              
106             L
107              
108             L
109              
110             L - Bioperl alternative for NCBI taxonomies.
111              
112              
113             =head1 AUTHOR
114              
115             Miguel Pignatelli
116              
117             Any comments or suggestions should be addressed to emepyc@gmail.com
118              
119             =head1 LICENSE
120              
121             Copyright 2009 Miguel Pignatelli, all rights reserved.
122              
123             This library is free software; you may redistribute it and/or modify it under the same terms as Perl itself.
124              
125             =cut
126              
127 2     2   58528 use strict;
  2         6  
  2         83  
128 2     2   10 use warnings;
  2         5  
  2         66  
129 2     2   11 use Carp qw/croak/;
  2         8  
  2         145  
130              
131             #use Bio::LITE::Taxonomy;
132             #if (do {(sprintf "%vd",$^V) =~ /5\.(\d\d)/; $1} >= 10}) {
133             # import base qw(Taxonomy);
134             #} else {
135             # import parent qw(Taxonomy);
136             #}
137 2     2   13 use base qw(Bio::LITE::Taxonomy);
  2         3  
  2         1103  
138              
139             our $VERSION = 0.09;
140              
141 2     2   912 use constant FS => '\t\|\t';
  2         4  
  2         153  
142 2     2   12 use constant RS => '\t\|\n';
  2         9  
  2         2302  
143              
144             sub new {
145 1     1 1 1122 my ($class,%args) = @_;
146 1         2 my %opts;
147              
148 1 50       5 defined $args{'nodes'} or croak "Need the file nodes.dmp";
149 1 50       5 defined $args{'names'} or croak "Need the file names.dmp";
150              
151 1         4 @opts{qw/nodesFile namesFile/} = @args{qw/nodes names/};
152              
153 1         7 my $self = bless \%opts, $class;
154 1         4 $self->_build_taxonomy();
155 1 50       6 if (defined $args{dict}) {
156 0         0 require Bio::LITE::Taxonomy::NCBI::Gi2taxid;
157 0   0     0 $self->{dict} = Bio::LITE::Taxonomy::NCBI::Gi2taxid->new(dict=>$args{dict},save_mem=>$args{save_mem} || 0);
158             }
159 1         14 return $self;
160             }
161              
162             sub _build_taxonomy {
163 1     1   2 my ($self) = @_;
164 1         8 my $nodesFile = $self->{nodesFile};
165 1         2 my $tax;
166 1 50 33     14 if ((UNIVERSAL::isa($nodesFile, 'GLOB')) or (ref \$nodesFile eq 'GLOB')) {
167 0         0 $tax = $nodesFile;
168             } else {
169 1 50       48 open $tax, "<", $nodesFile or croak "$!";
170             }
171 1         185341 while (<$tax>){
172 560790         1074233 chomp;
173 560790         1242098 $self -> _create_node(_parse_tax_rec($_));
174             }
175 1         7 $self -> _name_nodes();
176 1 50 33     37 close $tax unless ((UNIVERSAL::isa($nodesFile, 'GLOB')) or (ref \$nodesFile eq 'GLOB'));
177             }
178              
179             sub _create_node {
180 560790     560790   1741602 my ($self,$node,$parent,$level) = @_;
181 560790 100       2118505 $self->{allowed_levels}{$level} = 1 if (! defined $self->{allowed_levels}{$level});
182 560790         915044 @{$self->{nodes}->{$node}}{qw/parent level/} = ($parent,$level);
  560790         15342675  
183             }
184              
185             sub _name_nodes {
186 1     1   2 my ($self) = @_;
187 1         4 my $namesFile = $self->{namesFile};
188 1         2 my $nodesNames;
189 1 50 33     25 if ((UNIVERSAL::isa($namesFile, 'GLOB')) or (ref \$namesFile eq 'GLOB')) {
190 0         0 $nodesNames = $namesFile;
191             } else {
192 1 50       97 open $nodesNames, "<", $namesFile or croak $!;
193             }
194 1         1225 while (<$nodesNames>){
195 806059         1498144 chomp;
196 806059         1648567 my ($taxId,$taxName,$comment) = _process_tax_name ($_);
197 806059 100       15607913 if ($comment eq "scientific name"){
    100          
198 560790         779943 ${$self->{nodes}->{$taxId}}{name} = $taxName;
  560790         3379501  
199 560790         59234643 $self->{names}->{$taxName} = $taxId;
200             } elsif ($comment eq "synonym") {
201 138436         4580984 $self->{names}->{$taxName} = $taxId;
202             }
203             }
204 1         28 close $nodesNames;
205             }
206              
207             sub _parse_tax_rec {
208 560790     560790   1321249 my $line = shift @_;
209 560790         5700570 return (split FS,$line)[0,1,2];
210             }
211              
212              
213             sub _process_tax_name {
214 806059     806059   1736256 my $line = shift @_;
215 806059         102014963 my @fields = split FS, $line;
216 806059         3233950 $fields[3] =~ s/\t\|$//;
217 806059         4201086 return ($fields[0],$fields[1],$fields[3]);
218             }
219              
220             sub get_taxonomy_from_gi {
221 0     0 1   my ($self,$gi) = @_;
222 0 0         croak "Undefined GI\n" unless (defined $gi);
223 0           my $taxid = $self->{dict}->get_taxid($gi);
224 0           return $self->get_taxonomy($taxid);
225             }
226              
227             sub get_taxonomy_with_levels_from_gi {
228 0     0 1   my ($self,$gi) = @_;
229 0 0         croak "Undefined GI\n" unless (defined $gi);
230 0           my $taxid = $self->{dict}->get_taxid($gi);
231 0           return $self->get_taxonomy_with_levels($taxid);
232             }
233              
234             sub get_term_at_level_from_gi {
235 0     0 1   my ($self,$gi,$level) = @_;
236 0 0         croak "Undefined GI\n" unless (defined $gi);
237 0 0         croak "Undefined Level\n" unless (defined $level);
238 0           my $taxid = $self->{dict}->get_taxid($gi);
239 0           return $self->get_term_at_level($taxid,$level);
240             }
241              
242             # Note: Use methods in Gi2taxid as if they were from here
243             sub AUTOLOAD {
244 0     0     my ($self,$args) = @_;
245 0           our $AUTOLOAD;
246 0           my $method = $AUTOLOAD;
247 0           $method =~ s/.*:://;
248 0 0         $self->{dict}->can($method) or croak "$method not defined in package __PACKAGE__\n";
249 0           return $self->{dict}->$method($args);
250             }
251              
252             # Needed to dont call AUTLOAD on object destruction
253 0     0     sub DESTROY { }
254              
255             1;