|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Bio::LITE::Taxonomy::NCBI;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::LITE::Taxonomy::NCBI - Lightweight and efficient NCBI taxonomic manager  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  use Bio::LITE::Taxonomy::NCBI;  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  my $taxDB = Bio::LITE::Taxonomy::NCBI->new (  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                              db=>"NCBI",  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                              names=> "/path/to/names.dmp",  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                              nodes=>"/path/to/nodes.dmp"  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                             );  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  my $tax = $taxDB->get_taxonomy(1442); # 1442 is a Taxid  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  my $taxid = $taxDB->get_taxid_from_name("Bacteroidetes");  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  my $term = $taxDB->get_term_at_level(1442,"family");  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  my $taxDB2 = Bio::LITE::Taxonomy::NCBI-> new (  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                db=>"NCBI",  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                names=> "/path/to/names.dmp",  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                nodes=>"/path/to/nodes.dmp",  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                dict=>"/path/to/dictionary/file",  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                               );  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  my $tax2 = $taxDB2->get_taxonomy_from_gi(12553);  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  # Methods from Bio::LITE::Taxonomy::NCBI::Gi2taxid  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  # can also be called directly:  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  my $taxid2 = $taxDB2->get_taxid(12553);  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module provides easy and efficient access to the NCBI taxonomy with minimal dependencies and without intermediary databases.  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module is not part of the Bioperl bundle. For bioperl alternatives see the L"SEE ALSO"> section of this document.  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 CONSTRUCTOR  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 C  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Creates a Bio::LITE::Taxonomy::NCBI object.  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The following parameters are needed  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item names  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The location of the I file. Filehandles are also allowed. I.  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item nodes  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The location of the I file. Filehandles are also allowed. I.  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item dict  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You can query the tree using Cs directly instead of Cs. For doing this, you should provide the NCBIs GI to Taxid mapper in binary format as explained in L. I  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item save_mem  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Use this option to avoid to load the binary dictionary (GI to Taxid) into memory. This will save almost 1GB of system memory but looking up for Taxids will be ~20% slower. This parameter is optional, only makes sense if you are using the C to C dictionary and is I by default.  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module inherits from L so all the methods explained there are accessible.  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These methods are also available:  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item get_taxonomy_from_gi  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Accepts a C as input and returns an array with its ascendants ordered from top to bottom.  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my @tax = $tax->get_taxonomy_from_gi($gi);  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print "$_\n" for (@tax);  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If called in scalar context, returns an array reference instead of the array.  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See L::get_taxonomy  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item get_taxonomy_with_levels_from_gi  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The same as get_taxonomy_from_gi but instead of getting the ascendants returns an array of array references. Each array reference has the ascendant and its taxonomic level (at positions 0 and 1 respectively). This is simpler than it sounds. Check L::get_taxonomy_with_levels for more information.  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If called in scalar context, returns an array reference instead of the array.  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item get_term_at_level_from_gi  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Given a gi and a taxonomic level as input, returns the taxon. For example,  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $taxon = $tax->get_term_at_level_from_gi($gi,"family");  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See L::get_term_at_level.  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SEE ALSO  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L - Module to convert NCBIs GIs to Taxids  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L - Bioperl alternative for NCBI taxonomies.  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Miguel Pignatelli  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Any comments or suggestions should be addressed to emepyc@gmail.com  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 LICENSE  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Copyright 2009 Miguel Pignatelli, all rights reserved.  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This library is free software; you may redistribute it and/or modify it under the same terms as Perl itself.  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
127
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
58528
 | 
 use strict;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
83
 | 
    | 
| 
128
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
10
 | 
 use warnings;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
66
 | 
    | 
| 
129
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
11
 | 
 use Carp qw/croak/;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
145
 | 
    | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #use Bio::LITE::Taxonomy;  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #if (do {(sprintf "%vd",$^V) =~ /5\.(\d\d)/; $1}  >= 10}) {  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #    import base qw(Taxonomy);  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #} else {  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #	 import parent qw(Taxonomy);  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #}  | 
| 
137
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
13
 | 
 use base qw(Bio::LITE::Taxonomy);  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1103
 | 
    | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = 0.09;  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
141
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
912
 | 
 use constant FS => '\t\|\t';  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
153
 | 
    | 
| 
142
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
12
 | 
 use constant RS => '\t\|\n';  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2302
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
145
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
1122
 | 
   my ($class,%args) = @_;  | 
| 
146
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   my %opts;  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
148
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   defined $args{'nodes'} or croak "Need the file nodes.dmp";  | 
| 
149
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   defined $args{'names'} or croak "Need the file names.dmp";  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
151
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   @opts{qw/nodesFile namesFile/} = @args{qw/nodes names/};  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
153
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
   my $self = bless \%opts, $class;  | 
| 
154
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   $self->_build_taxonomy();  | 
| 
155
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   if (defined $args{dict}) {  | 
| 
156
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     require Bio::LITE::Taxonomy::NCBI::Gi2taxid;  | 
| 
157
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
     $self->{dict} = Bio::LITE::Taxonomy::NCBI::Gi2taxid->new(dict=>$args{dict},save_mem=>$args{save_mem} || 0);  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
159
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
   return $self;  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _build_taxonomy {  | 
| 
163
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
2
 | 
   my ($self) = @_;  | 
| 
164
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
   my $nodesFile = $self->{nodesFile};  | 
| 
165
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   my $tax;  | 
| 
166
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
14
 | 
   if ((UNIVERSAL::isa($nodesFile, 'GLOB')) or (ref \$nodesFile eq 'GLOB')) {  | 
| 
167
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $tax = $nodesFile;  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
169
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
48
 | 
     open $tax, "<", $nodesFile or croak "$!";  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
171
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
185341
 | 
   while (<$tax>){  | 
| 
172
 | 
560790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1074233
 | 
     chomp;  | 
| 
173
 | 
560790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1242098
 | 
     $self -> _create_node(_parse_tax_rec($_));  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
175
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
   $self -> _name_nodes();  | 
| 
176
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
37
 | 
   close $tax unless ((UNIVERSAL::isa($nodesFile, 'GLOB')) or (ref \$nodesFile eq 'GLOB'));  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _create_node {  | 
| 
180
 | 
560790
 | 
 
 | 
 
 | 
  
560790
  
 | 
 
 | 
1741602
 | 
   my ($self,$node,$parent,$level) = @_;  | 
| 
181
 | 
560790
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2118505
 | 
   $self->{allowed_levels}{$level} = 1 if (! defined $self->{allowed_levels}{$level});  | 
| 
182
 | 
560790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
915044
 | 
   @{$self->{nodes}->{$node}}{qw/parent level/} = ($parent,$level);  | 
| 
 
 | 
560790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15342675
 | 
    | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _name_nodes {  | 
| 
186
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
2
 | 
   my ($self) = @_;  | 
| 
187
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   my $namesFile = $self->{namesFile};  | 
| 
188
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   my $nodesNames;  | 
| 
189
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
25
 | 
   if ((UNIVERSAL::isa($namesFile, 'GLOB')) or (ref \$namesFile eq 'GLOB')) {  | 
| 
190
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $nodesNames = $namesFile;  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
192
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
97
 | 
     open $nodesNames, "<", $namesFile or croak $!;  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
194
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1225
 | 
   while (<$nodesNames>){  | 
| 
195
 | 
806059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1498144
 | 
     chomp;  | 
| 
196
 | 
806059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1648567
 | 
     my ($taxId,$taxName,$comment) = _process_tax_name ($_);  | 
| 
197
 | 
806059
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15607913
 | 
     if ($comment eq "scientific name"){  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
198
 | 
560790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
779943
 | 
       ${$self->{nodes}->{$taxId}}{name} = $taxName;  | 
| 
 
 | 
560790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3379501
 | 
    | 
| 
199
 | 
560790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
59234643
 | 
       $self->{names}->{$taxName} = $taxId;  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($comment eq "synonym") {  | 
| 
201
 | 
138436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4580984
 | 
       $self->{names}->{$taxName} = $taxId;  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
204
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
   close $nodesNames;  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _parse_tax_rec {  | 
| 
208
 | 
560790
 | 
 
 | 
 
 | 
  
560790
  
 | 
 
 | 
1321249
 | 
   my $line = shift @_;  | 
| 
209
 | 
560790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5700570
 | 
   return (split FS,$line)[0,1,2];  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _process_tax_name {  | 
| 
214
 | 
806059
 | 
 
 | 
 
 | 
  
806059
  
 | 
 
 | 
1736256
 | 
   my $line = shift @_;  | 
| 
215
 | 
806059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
102014963
 | 
   my @fields = split FS, $line;  | 
| 
216
 | 
806059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3233950
 | 
   $fields[3] =~ s/\t\|$//;  | 
| 
217
 | 
806059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4201086
 | 
   return ($fields[0],$fields[1],$fields[3]);  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_taxonomy_from_gi {  | 
| 
221
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
   my ($self,$gi) = @_;  | 
| 
222
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   croak "Undefined GI\n" unless (defined $gi);  | 
| 
223
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $taxid = $self->{dict}->get_taxid($gi);  | 
| 
224
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return $self->get_taxonomy($taxid);  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_taxonomy_with_levels_from_gi {  | 
| 
228
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
   my ($self,$gi) = @_;  | 
| 
229
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   croak "Undefined GI\n" unless (defined $gi);  | 
| 
230
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $taxid = $self->{dict}->get_taxid($gi);  | 
| 
231
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return $self->get_taxonomy_with_levels($taxid);  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_term_at_level_from_gi {  | 
| 
235
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
   my ($self,$gi,$level) = @_;  | 
| 
236
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   croak "Undefined GI\n" unless (defined $gi);  | 
| 
237
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   croak "Undefined Level\n" unless (defined $level);  | 
| 
238
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $taxid = $self->{dict}->get_taxid($gi);  | 
| 
239
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return $self->get_term_at_level($taxid,$level);  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Note: Use methods in Gi2taxid as if they were from here  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub AUTOLOAD {  | 
| 
244
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
   my ($self,$args) = @_;  | 
| 
245
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   our $AUTOLOAD;  | 
| 
246
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $method = $AUTOLOAD;  | 
| 
247
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $method =~ s/.*:://;  | 
| 
248
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self->{dict}->can($method) or croak "$method not defined in package __PACKAGE__\n";  | 
| 
249
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return $self->{dict}->$method($args);  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Needed to dont call AUTLOAD on object destruction  | 
| 
253
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 sub DESTROY     { }  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  |