| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | #!/usr/bin/perl | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | package Bio::LITE::Taxonomy; | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | =head1 NAME | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | Bio::LITE::Taxonomy - Lightweight and efficient taxonomic tree manager | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | use Bio::LITE::Taxonomy | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | my $taxNCBI = Bio::LITE::Taxonomy::NCBI->new ( | 
| 14 |  |  |  |  |  |  | names=> "/path/to/names.dmp", | 
| 15 |  |  |  |  |  |  | nodes=>"/path/to/nodes.dmp" | 
| 16 |  |  |  |  |  |  | ); | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | my @taxNCBI = $taxNCBI->get_taxonomy(1442); | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | my $taxRDP = Bio::LITE::Taxonomy::RDP->new ( | 
| 21 |  |  |  |  |  |  | bergeyXML=>"/media/disk-1/bergeyTrainingTree.xml" | 
| 22 |  |  |  |  |  |  | ) | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | my @taxRDP = $taxRDP->get_taxonomy(22075); | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | This module provides easy and efficient access to different taxonomies (NCBI and RDP) with minimal dependencies and without intermediate databases. This module should be used through specific taxonomic interfaces (e.g. L or L). | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | This module is not part of the Bioperl bundle. For Bioperl alternatives, see the L"SEE ALSO"> section of this document. If you are dealing with big datasets or you don't need the rest of the Bioperl bundle to process taxonomic queries this module is for you. | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | These modules are designed with performance in mind. The trees are stored in memory (as plain hashes). The GI to Taxid mappings provided by L are very efficient. It also supports both NCBI and RDP taxonomies following the same interface. | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | =head1 METHODS | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | The following methods are available: | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | =over 4 | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | =item get_taxonomy | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | Accepts a taxid as input and returns an array with its ascendants ordered from top to bottom. | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | my @tax = $tax->get_taxonomy($taxid); | 
| 46 |  |  |  |  |  |  | print "$_\n" for (@tax); | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | If called in scalar context, returns an array reference. | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | =item get_taxonomy_with_levels | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | The same as get_taxonomy but instead of getting the ascendants returns an array of array references. Each array reference has the ascendant and its taxonomic level (at positions 0 and 1 respectively). This is simpler than it sounds. Check this: | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | my @taxL = $tax->get_taxonomy_with_levels($taxid); | 
| 55 |  |  |  |  |  |  | for my $l (@taxL) { | 
| 56 |  |  |  |  |  |  | print "Taxon $l->[0] has rank $l->[1]\n"; | 
| 57 |  |  |  |  |  |  | } | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | If called in scalar context, returns an array reference. | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | =item get_taxid_from_name | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | Accepts the scientific name of a taxon and returns its associated taxid. | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | =item get_taxonomy_from_name | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | Same as before but returns the full taxonomy of the scientific name. This is the same as: | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | my $taxid = $tax->get_taxid_from_name($name); | 
| 70 |  |  |  |  |  |  | my @taxonomy = $tax->get_taxonomy($taxid); | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | If called in scalar context returns an array reference. | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | =item get_term_at_level | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | Given a taxid and a taxonomic level as input, returns the taxon. For example, | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | my $taxon = $tax->get_term_at_level(1442,"family"); # $taxon = Bacillaceae | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | =item get_level_from_name | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | Given a taxon's scientific name, returns its associated taxonomic level. | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | =back | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | L | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | L | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | L: Module to obtain NCBIs Taxids from GIs. | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | L: Bioperl alternative to handle taxonomies. | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | L: Bioperl module to handle nodes in taxonomies | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | =head1 AUTHOR | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | Miguel Pignatelli | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | Any comments or suggestions should be addressed to emepyc@gmail.com | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | =head1 LICENSE | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | Copyright 2009 Miguel Pignatelli, all rights reserved. | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  | This library is free software; you may redistribute it and/or modify it under the same terms as Perl itself. | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | =cut | 
| 111 |  |  |  |  |  |  |  | 
| 112 | 3 |  |  | 3 |  | 67811 | use strict; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 119 |  | 
| 113 | 3 |  |  | 3 |  | 17 | use warnings; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 97 |  | 
| 114 | 3 |  |  | 3 |  | 17 | use Carp qw/croak/; | 
|  | 3 |  |  |  |  | 8 |  | 
|  | 3 |  |  |  |  | 228 |  | 
| 115 |  |  |  |  |  |  |  | 
| 116 | 3 |  |  | 3 |  | 17 | use vars qw/$VERSION @ISA/; | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 2195 |  | 
| 117 |  |  |  |  |  |  | $VERSION = '0.07'; | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | sub _check_level | 
| 120 |  |  |  |  |  |  | { | 
| 121 | 0 |  |  | 0 |  |  | my ($self, $level) = @_; | 
| 122 | 0 | 0 |  |  |  |  | croak "Level not defined" unless defined $level;; | 
| 123 | 0 |  |  |  |  |  | return $self->{allowed_levels}{$level}; | 
| 124 |  |  |  |  |  |  | } | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | sub _print_levels | 
| 127 |  |  |  |  |  |  | { | 
| 128 | 0 |  |  | 0 |  |  | my ($self) = @_; | 
| 129 | 0 |  |  |  |  |  | print STDERR "$_\n" for sort keys %{$self->{allowed_levels}}; | 
|  | 0 |  |  |  |  |  |  | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | sub get_term_at_level | 
| 133 |  |  |  |  |  |  | { | 
| 134 | 0 |  |  | 0 | 1 |  | my ($self,$taxid,$level) = @_; | 
| 135 | 0 | 0 |  |  |  |  | do { | 
| 136 | 0 |  |  |  |  |  | print STDERR "Level $level not recognized\nAllowed levels:\n"; | 
| 137 | 0 |  |  |  |  |  | $self->_print_levels; | 
| 138 | 0 |  |  |  |  |  | croak; | 
| 139 |  |  |  |  |  |  | } if (! defined $self->_check_level($level)); | 
| 140 | 0 | 0 |  |  |  |  | return "" unless (defined ${$self->{nodes}->{$taxid}}{name}); | 
|  | 0 |  |  |  |  |  |  | 
| 141 | 0 |  |  |  |  |  | while (${$self->{nodes}->{$taxid}}{name} ne "root"){ | 
|  | 0 |  |  |  |  |  |  | 
| 142 | 0 | 0 |  |  |  |  | return ${$self->{nodes}->{$taxid}}{name} if (${$self->{nodes}->{$taxid}}{level} eq $level); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 143 | 0 |  |  |  |  |  | $taxid = ${$self->{nodes}->{$taxid}}{parent}; | 
|  | 0 |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | } | 
| 145 | 0 |  |  |  |  |  | return "undef"; | 
| 146 |  |  |  |  |  |  | } | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | sub get_taxonomy | 
| 149 |  |  |  |  |  |  | { | 
| 150 | 0 |  |  | 0 | 1 |  | my ($self, $taxid) = @_; | 
| 151 | 0 | 0 |  |  |  |  | return undef unless defined $taxid; | 
| 152 | 0 | 0 |  |  |  |  | return "" unless defined ${$self->{nodes}->{$taxid}}{name}; | 
|  | 0 |  |  |  |  |  |  | 
| 153 | 0 |  |  |  |  |  | my @taxonomy; | 
| 154 | 0 |  |  |  |  |  | while (${$self->{nodes}->{$taxid}}{name} ne "root"){ | 
|  | 0 |  |  |  |  |  |  | 
| 155 | 0 |  |  |  |  |  | push @taxonomy, ${$self->{nodes}->{$taxid}}{name}; | 
|  | 0 |  |  |  |  |  |  | 
| 156 | 0 |  |  |  |  |  | $taxid = ${$self->{nodes}->{$taxid}}{parent}; | 
|  | 0 |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | } | 
| 158 |  |  |  |  |  |  | #      pop @taxonomy;  # Causes lost of first non-root annotation | 
| 159 |  |  |  |  |  |  | return wantarray | 
| 160 | 0 | 0 |  |  |  |  | ? reverse @taxonomy | 
| 161 |  |  |  |  |  |  | : [reverse @taxonomy]; | 
| 162 |  |  |  |  |  |  | #      return reverse do{pop @taxonomy;@taxonomy}; | 
| 163 |  |  |  |  |  |  | } | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | # Note: | 
| 166 |  |  |  |  |  |  | # This may change in the future. | 
| 167 |  |  |  |  |  |  | # It would be simpler to return a hash reference, but (specially in the NCBI taxonomy) | 
| 168 |  |  |  |  |  |  | # many taxons has not associated level (i.e. C) which would imply collapsing the keys. | 
| 169 |  |  |  |  |  |  | # And of course, use an ordered hash or keep an extra array with the ordering. | 
| 170 |  |  |  |  |  |  | # The code for doing this is commented below. | 
| 171 |  |  |  |  |  |  | # Look for user feedback about this. | 
| 172 |  |  |  |  |  |  | sub get_taxonomy_with_levels | 
| 173 |  |  |  |  |  |  | { | 
| 174 | 0 |  |  | 0 | 1 |  | my ($self,$taxid) = @_; | 
| 175 | 0 | 0 |  |  |  |  | return undef unless defined $taxid; | 
| 176 | 0 | 0 |  |  |  |  | return "" unless defined ${$self->{nodes}->{$taxid}}{name}; | 
|  | 0 |  |  |  |  |  |  | 
| 177 | 0 |  |  |  |  |  | my @taxonomy; | 
| 178 | 0 |  |  |  |  |  | while (${$self->{nodes}->{$taxid}}{name} ne "root"){ | 
|  | 0 |  |  |  |  |  |  | 
| 179 | 0 |  |  |  |  |  | push @taxonomy, [${$self->{nodes}->{$taxid}}{name},${$self->{nodes}->{$taxid}}{level}]; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 180 | 0 |  |  |  |  |  | $taxid = ${$self->{nodes}->{$taxid}}{parent}; | 
|  | 0 |  |  |  |  |  |  | 
| 181 |  |  |  |  |  |  | } | 
| 182 |  |  |  |  |  |  | #      pop @taxonomy; # Last element is cellular_organism... always? | 
| 183 |  |  |  |  |  |  | return wantarray | 
| 184 | 0 | 0 |  |  |  |  | ? reverse @taxonomy | 
| 185 |  |  |  |  |  |  | :  [reverse @taxonomy]; | 
| 186 |  |  |  |  |  |  | #      return reverse do{pop @taxonomy;@taxonomy}; | 
| 187 |  |  |  |  |  |  | } | 
| 188 |  |  |  |  |  |  |  | 
| 189 |  |  |  |  |  |  | sub get_level_from_name | 
| 190 |  |  |  |  |  |  | { | 
| 191 | 0 |  |  | 0 | 1 |  | my ($self,$name) = @_; | 
| 192 | 0 | 0 |  |  |  |  | return defined $self->{names}{$name} ? $self->{nodes}->{$self->{names}{$name}}->{level} : undef; | 
| 193 |  |  |  |  |  |  | } | 
| 194 |  |  |  |  |  |  |  | 
| 195 |  |  |  |  |  |  | sub get_taxid_from_name | 
| 196 |  |  |  |  |  |  | { | 
| 197 | 0 |  |  | 0 | 1 |  | my ($self,$name) = @_; | 
| 198 | 0 | 0 |  |  |  |  | return defined $self->{names}{$name} ? $self->{names}{$name} : undef; | 
| 199 |  |  |  |  |  |  | } | 
| 200 |  |  |  |  |  |  |  | 
| 201 |  |  |  |  |  |  | sub get_taxonomy_from_name | 
| 202 |  |  |  |  |  |  | { | 
| 203 | 0 |  |  | 0 | 1 |  | my ($self,$name) = @_; | 
| 204 | 0 |  |  |  |  |  | return $self->get_taxonomy($self->{names}{$name}); | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  |  | 
| 207 |  |  |  |  |  |  | # Currently not in use. May apply in the future | 
| 208 |  |  |  |  |  |  | # sub _get_taxonomy | 
| 209 |  |  |  |  |  |  | #       { | 
| 210 |  |  |  |  |  |  | #         my ($self,$taxid) = @_; | 
| 211 |  |  |  |  |  |  | #         return undef unless (defined $taxid); | 
| 212 |  |  |  |  |  |  | #         return "" unless defined ${$self->{nodes}->{$taxid}}{name}; | 
| 213 |  |  |  |  |  |  | #         my %taxonomy; | 
| 214 |  |  |  |  |  |  | #         my @order; | 
| 215 |  |  |  |  |  |  | #         while (${$self->{nodes}->{$taxid}}{name} ne "root"){ | 
| 216 |  |  |  |  |  |  | #           push @{$taxonomy{${$self->{nodes}->{$taxid}}{level}}}, ${$self->{nodes}->{$taxid}}{name}; | 
| 217 |  |  |  |  |  |  | #           push @order, ${$self->{nodes}->{$taxid}}{level}; | 
| 218 |  |  |  |  |  |  | #           $taxid = ${$self->{nodes}->{$taxid}}{parent}; | 
| 219 |  |  |  |  |  |  | #         } | 
| 220 |  |  |  |  |  |  | #         return (\%taxonomy,\@order); | 
| 221 |  |  |  |  |  |  | #       } | 
| 222 |  |  |  |  |  |  |  | 
| 223 |  |  |  |  |  |  | # Currently not in use. May apply in the future | 
| 224 |  |  |  |  |  |  | # sub get_taxonomy1 | 
| 225 |  |  |  |  |  |  | #         { | 
| 226 |  |  |  |  |  |  | #           my ($self,$taxid) = @_; | 
| 227 |  |  |  |  |  |  | #           my ($t,$o) = $self->_get_taxonomy($taxid); | 
| 228 |  |  |  |  |  |  | #           my @taxonomy; | 
| 229 |  |  |  |  |  |  | #           for my $l (@$o) { | 
| 230 |  |  |  |  |  |  | #             push @taxonomy,shift @{$t->{$l}}; | 
| 231 |  |  |  |  |  |  | #           } | 
| 232 |  |  |  |  |  |  | #           return reverse do{pop @taxonomy; @taxonomy}; | 
| 233 |  |  |  |  |  |  | #         } | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | # Currently not in use. May apply in the future | 
| 236 |  |  |  |  |  |  | # sub get_taxonomy_with_levels1 | 
| 237 |  |  |  |  |  |  | #           { | 
| 238 |  |  |  |  |  |  | #             my ($self,$taxid) = @_; | 
| 239 |  |  |  |  |  |  | #             my ($t,$o) = $self->_get_taxonomy($taxid); | 
| 240 |  |  |  |  |  |  | #             my @taxonomy; | 
| 241 |  |  |  |  |  |  | #             for my $l (@$o) { | 
| 242 |  |  |  |  |  |  | #               push @taxonomy, ($l.":".shift @{$t->{$l}}); | 
| 243 |  |  |  |  |  |  | #             } | 
| 244 |  |  |  |  |  |  | #             return join "\t",@taxonomy; | 
| 245 |  |  |  |  |  |  | #           } | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | 1; |