File Coverage

Bio/DB/Taxonomy.pm
Criterion Covered Total %
statement 61 84 72.6
branch 13 22 59.0
condition 16 24 66.6
subroutine 9 15 60.0
pod 8 8 100.0
total 107 153 69.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::DB::Taxonomy
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::DB::Taxonomy - Access to a taxonomy database
17              
18             =head1 SYNOPSIS
19              
20             use Bio::DB::Taxonomy;
21             my $db = Bio::DB::Taxonomy->new(-source => 'entrez');
22             # use NCBI Entrez over HTTP
23             my $taxonid = $db->get_taxonid('Homo sapiens');
24              
25             # get a taxon
26             my $taxon = $db->get_taxon(-taxonid => $taxonid);
27              
28             =head1 DESCRIPTION
29              
30             This is a front end module for access to a taxonomy database.
31              
32             =head1 FEEDBACK
33              
34             =head2 Mailing Lists
35              
36             User feedback is an integral part of the evolution of this and other
37             Bioperl modules. Send your comments and suggestions preferably to
38             the Bioperl mailing list. Your participation is much appreciated.
39              
40             bioperl-l@bioperl.org - General discussion
41             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42              
43             =head2 Support
44              
45             Please direct usage questions or support issues to the mailing list:
46              
47             I
48              
49             rather than to the module maintainer directly. Many experienced and
50             reponsive experts will be able look at the problem and quickly
51             address it. Please include a thorough description of the problem
52             with code and data examples if at all possible.
53              
54             =head2 Reporting Bugs
55              
56             Report bugs to the Bioperl bug tracking system to help us keep track
57             of the bugs and their resolution. Bug reports can be submitted via
58             the web:
59              
60             https://github.com/bioperl/bioperl-live/issues
61              
62             =head1 AUTHOR - Jason Stajich
63              
64             Email jason-at-bioperl.org
65              
66             =head1 CONTRIBUTORS
67              
68             Sendu Bala: bix@sendu.me.uk
69              
70             =head1 APPENDIX
71              
72             The rest of the documentation details each of the object methods.
73             Internal methods are usually preceded with a _
74              
75             =cut
76              
77             # Let the code begin...
78              
79             package Bio::DB::Taxonomy;
80 44     44   1174 use vars qw($DefaultSource $TAXON_IIDS);
  44         109  
  44         3314  
81 44     44   214 use strict;
  44         76  
  44         1637  
82 44     44   12293 use Bio::Tree::Tree;
  44         160  
  44         1465  
83              
84 44     44   324 use base qw(Bio::Root::Root);
  44         75  
  44         36608  
85              
86             $DefaultSource = 'entrez';
87             $TAXON_IIDS = {};
88              
89              
90             =head2 new
91              
92             Title : new
93             Usage : my $obj = Bio::DB::Taxonomy->new(-source => 'entrez');
94             Function: Builds a new Bio::DB::Taxonomy object.
95             Returns : an instance of Bio::DB::Taxonomy
96             Args : -source => which database source 'entrez' (NCBI taxonomy online),
97             'flatfile' (local NCBI taxonomy), 'greengenes' (local
98             GreenGenes taxonomy), 'silva' (local Silva taxonomy), or
99             'list' (Do-It-Yourself taxonomy)
100              
101             =cut
102              
103             sub new {
104 514     514 1 1631 my($class,@args) = @_;
105              
106 514 100       2250 if( $class =~ /Bio::DB::Taxonomy::(\S+)/ ) {
107 258         1081 my ($self) = $class->SUPER::new(@args);
108 258         1011 $self->_initialize(@args);
109 258         731 return $self;
110             } else {
111 256         1094 my %param = @args;
112 256         1114 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
  510         1582  
113 256   33     1116 my $source = $param{'-source'} || $DefaultSource;
114              
115 256         506 $source = "\L$source"; # normalize capitalization to lower case
116              
117             # normalize capitalization
118 256 50       1045 return unless( $class->_load_tax_module($source) );
119 256         2187 return "Bio::DB::Taxonomy::$source"->new(@args);
120             }
121             }
122              
123              
124             # empty for now
125       258     sub _initialize { }
126              
127              
128             =head2 get_num_taxa
129              
130             Title : get_num_taxa
131             Usage : my $num = $db->get_num_taxa();
132             Function: Get the number of taxa stored in the database.
133             Returns : A number
134             Args : None
135              
136             =cut
137              
138             sub get_num_taxa {
139 0     0 1 0 shift->throw_not_implemented();
140             }
141              
142              
143             =head2 get_taxon
144              
145             Title : get_taxon
146             Usage : my $taxon = $db->get_taxon(-taxonid => $taxonid);
147             Function: Get a Bio::Taxon object from the database.
148             Returns : Bio::Taxon object
149             Args : just a single value which is the database id, OR named args:
150             -taxonid => taxonomy id (to query by taxonid)
151             OR
152             -name => string (to query by a taxonomy name: common name,
153             scientific name, etc)
154              
155             =cut
156              
157             sub get_taxon {
158 0     0 1 0 shift->throw_not_implemented();
159             }
160              
161             *get_Taxonomy_Node = \&get_taxon;
162              
163              
164             =head2 get_taxonids
165              
166             Title : get_taxonids
167             Usage : my @taxonids = $db->get_taxonids('Homo sapiens');
168             Function: Searches for a taxonid (typically ncbi_taxon_id) based on a query
169             string. Note that multiple taxonids can match to the same supplied
170             name.
171             Returns : array of integer ids in list context, one of these in scalar context
172             Args : string representing the taxon's name
173              
174             =cut
175              
176             sub get_taxonids {
177 0     0 1 0 shift->throw_not_implemented();
178             }
179              
180             *get_taxonid = \&get_taxonids;
181             *get_taxaid = \&get_taxonids;
182              
183              
184             =head2 get_tree
185              
186             Title : get_tree
187             Usage : my $tree = $db->get_tree(@species_names);
188             Function: Generate a tree comprised of the full lineages of all the supplied
189             species names. The nodes for the requested species are given
190             name('supplied') values corresponding to the supplied name, such that
191             they can be identified if the real species name in the database
192             (stored under node_name()) is different. The nodes are also given an
193             arbitrary branch length of 1.
194             Returns : Bio::Tree::Tree
195             Args : A list of species names (strings) to include in the tree.
196              
197             =cut
198              
199             sub get_tree {
200 0     0 1 0 my ($self, @species_names) = @_;
201            
202             # the full lineages of the species are merged into a single tree
203 0         0 my $tree;
204 0         0 for my $name (@species_names) {
205 0         0 my @ids = $self->get_taxonids($name);
206 0 0       0 if (not scalar @ids) {
207 0         0 $self->throw("Could not find species $name in the taxonomy");
208             }
209 0         0 for my $id (@ids) {
210 0         0 my $node = $self->get_taxon(-taxonid => $id);
211 0         0 $node->name('supplied', $name);
212 0 0       0 if ($tree) {
213 0         0 $tree->merge_lineage($node);
214             } else {
215 0         0 $tree = Bio::Tree::Tree->new(-verbose => $self->verbose, -node => $node);
216             }
217             }
218             }
219              
220             # add arbitrary branch length
221 0         0 for my $node ($tree->get_nodes) {
222 0         0 $node->branch_length(1);
223             }
224            
225 0         0 return $tree;
226             }
227              
228              
229             =head2 ancestor
230              
231             Title : ancestor
232             Usage : my $ancestor_taxon = $db->ancestor($taxon);
233             Function: Retrieve the full ancestor taxon of a supplied Taxon from the
234             database.
235             Returns : Bio::Taxon
236             Args : Bio::Taxon (that was retrieved from this database)
237              
238             =cut
239              
240             sub ancestor {
241 0     0 1 0 shift->throw_not_implemented();
242             }
243              
244              
245             =head2 each_Descendent
246              
247             Title : each_Descendent
248             Usage : my @taxa = $db->each_Descendent($taxon);
249             Function: Get all the descendents of the supplied Taxon (but not their
250             descendents, ie. not a recursive fetchall).
251             Returns : Array of Bio::Taxon objects
252             Args : Bio::Taxon (that was retrieved from this database)
253              
254             =cut
255              
256             sub each_Descendent {
257 0     0 1 0 shift->throw_not_implemented();
258             }
259              
260              
261             =head2 get_all_Descendents
262              
263             Title : get_all_Descendents
264             Usage : my @taxa = $db->get_all_Descendents($taxon);
265             Function: Like each_Descendent(), but do a recursive fetchall
266             Returns : Array of Bio::Taxon objects
267             Args : Bio::Taxon (that was retrieved from this database)
268              
269             =cut
270              
271             sub get_all_Descendents {
272 14     14 1 24 my ($self, $taxon) = @_;
273 14         15 my @taxa;
274 14         30 foreach my $desc_taxon ($self->each_Descendent($taxon)) {
275 12         41 push @taxa, ($desc_taxon, $self->get_all_Descendents($desc_taxon));
276             }
277 14         35 return @taxa;
278             }
279              
280              
281             =head2 _load_tax_module
282              
283             Title : _load_tax_module
284             Usage : *INTERNAL Bio::DB::Taxonomy stuff*
285             Function: Loads up (like use) a module at run time on demand
286              
287             =cut
288              
289             sub _load_tax_module {
290 256     256   672 my ($self, $source) = @_;
291 256         694 my $module = "Bio::DB::Taxonomy::" . $source;
292 256         379 my $ok;
293              
294 256         571 eval { $ok = $self->_load_module($module) };
  256         1091  
295 256 50       969 if ( $@ ) {
296 0         0 print STDERR $@;
297 0         0 print STDERR <
298             $self: $source cannot be found
299             Exception $@
300             For more information about the Bio::DB::Taxonomy system please see
301             the Bio::DB::Taxonomy docs. This includes ways of checking for
302             formats at compile time, not run time.
303             END
304             ;
305             }
306 256         896 return $ok;
307             }
308              
309              
310             =head2 _handle_internal_id
311              
312             Title : _handle_internal_id
313             Usage : *INTERNAL Bio::DB::Taxonomy stuff*
314             Function: Add an internal ID to a taxon object, ensuring that the taxon gets
315             the same internal ID, regardless of which database it is retrieved
316             from.
317             Returns : The assigned internal ID
318             Args : * A Bio::Taxon
319             * An optional boolean to decide whether or not to try and do the job
320             using scientific name & rank in addition to taxon ID. This is
321             useful if your IDs are not comparable to that of other databases,
322             e.g. if they are arbitrary, as in the case of Bio::DB::Taxonomy::list.
323             CAVEAT: will handle ambiguous names within a database fine, but not
324             across multiple databases.
325              
326             =cut
327              
328             sub _handle_internal_id {
329 6910     6910   10408 my ($self, $taxon, $try_name) = @_;
330 6910 50 33     28257 $self->throw("Must supply a Bio::Taxon") unless ref($taxon) && $taxon->isa('Bio::Taxon');
331              
332 6910   50     12241 my $taxid = $taxon->id || return;
333 6910   100     13086 my $name = $taxon->scientific_name || '';
334 6910   100     12681 my $rank = $taxon->rank || 'no rank';
335 6910 50       15886 my $dbh = $try_name ? $taxon->db_handle : 'any';
336              
337 6910         16218 my $iid = $TAXON_IIDS->{taxids}->{$dbh}->{$taxid};
338 6910 100 66     22503 if ( (not defined $iid) && $try_name && $name && exists $TAXON_IIDS->{names}->{$name}) {
      100        
      100        
339             # Search for a suitable IID based on species name and ranks
340 1542         2776 my %test_ranks = map {$_ => undef} ($rank, 'no rank');
  3084         6939  
341 1542         4997 SEARCH: while (my ($test_rank, undef) = each %test_ranks) {
342             # Search at the specified rank first, then with 'no rank'
343 1542         1950 while ( my ($test_iid, $test_info) = each %{$TAXON_IIDS->{names}->{$name}->{$rank}} ) {
  2061         9691  
344 1548         5406 while (my ($test_db, $test_taxid) = each %$test_info) {
345 1029 50 33     3280 if ( ($test_db eq $dbh) && not($test_taxid eq $taxid) ) {
346             # Taxa are different (same database, different taxid)
347 0         0 next;
348             }
349             # IID is acceptable since taxa are from different databases,
350             # or from the same database but have the same taxid
351 1029         1278 $iid = $test_iid;
352 1029         2520 $TAXON_IIDS->{taxids}->{$dbh}->{$taxid} = $iid;
353 1029         3209 last SEARCH;
354             }
355             }
356             }
357             }
358              
359 6910 100       10259 if (defined $iid) {
360             # Assign Bio::DB::Taxonomy IID with risky Bio::Tree::Node internal method
361 5548         11022 $taxon->_creation_id($iid);
362             } else {
363             # Register new IID in Bio::DB::Taxonomy
364 1362         2773 $iid = $taxon->internal_id;
365 1362         3814 $TAXON_IIDS->{taxids}->{$dbh}->{$taxid} = $iid;
366 1362 100       2381 if ($name) {
367 1359         5224 $TAXON_IIDS->{names}->{$name}->{$rank}->{$iid}->{$taxon->db_handle} = $taxid
368             }
369             }
370              
371 6910         13696 return $iid;
372              
373             }
374              
375              
376             1;