File Coverage

Bio/DB/Taxonomy/silva.pm
Criterion Covered Total %
statement 31 31 100.0
branch 8 10 80.0
condition n/a
subroutine 5 5 100.0
pod 1 1 100.0
total 45 47 95.7


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::DB::Taxonomy::silva
3             #
4             # Please direct questions and support issues to
5             #
6             # Copyright Florent Angly
7             #
8             # You may distribute this module under the same terms as perl itself
9              
10              
11             =head1 NAME
12              
13             Bio::DB::Taxonomy::silva - Use the Silva taxonomy
14              
15             =head1 SYNOPSIS
16              
17             use Bio::DB::Taxonomy;
18              
19             my $db = Bio::DB::Taxonomy->new(
20             -source => 'silva',
21             -taxofile => 'SSURef_108_tax_silva_trunc.fasta',
22             );
23              
24             =head1 DESCRIPTION
25              
26             This is an implementation of Bio::DB::Taxonomy which stores and accesses the
27             Silva taxonomy. Internally, Bio::DB::Taxonomy::silva keeps the taxonomy
28             into memory by using Bio::DB::Taxonomy::list. As a consequence, note that the
29             IDs assigned to the taxonomy nodes, e.g. sv72, are arbitrary, contrary to the
30             pre-defined IDs that NCBI assigns to taxons. Note also that no rank names or
31             common names are assigned to the taxa of Bio::DB::Taxonomy::silva.
32              
33             The latest Silva taxonomy (2011) contains about 126,000 taxa and occupies
34             about 124 MB of memory once parsed into a Bio::DB::Taxonomy::silva object.
35             Obviously, it can take a little while to load.
36              
37             The taxonomy file SSURef_108_tax_silva_trunc.fasta that this module uses is
38             available from L.
39              
40             =head1 FEEDBACK
41              
42             =head2 Mailing Lists
43              
44             User feedback is an integral part of the evolution of this and other
45             Bioperl modules. Send your comments and suggestions preferably to
46             the Bioperl mailing list. Your participation is much appreciated.
47              
48             bioperl-l@bioperl.org - General discussion
49             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50              
51             =head2 Support
52              
53             Please direct usage questions or support issues to the mailing list:
54              
55             I
56              
57             rather than to the module maintainer directly. Many experienced and
58             reponsive experts will be able look at the problem and quickly
59             address it. Please include a thorough description of the problem
60             with code and data examples if at all possible.
61              
62             =head2 Reporting Bugs
63              
64             Report bugs to the Bioperl bug tracking system to help us keep track
65             of the bugs and their resolution. Bug reports can be submitted via
66             the web:
67              
68             https://github.com/bioperl/bioperl-live/issues
69              
70             =head1 AUTHOR - Florent Angly
71              
72             florent.angly@gmail.com
73              
74             =head1 APPENDIX
75              
76             The rest of the documentation details each of the object methods.
77             Internal methods are usually preceded with a _
78              
79             =cut
80              
81              
82             package Bio::DB::Taxonomy::silva;
83              
84 1     1   4 use strict;
  1         1  
  1         23  
85 1     1   322 use Bio::SeqIO;
  1         3  
  1         29  
86              
87 1     1   4 use base qw(Bio::DB::Taxonomy Bio::DB::Taxonomy::list);
  1         1  
  1         372  
88              
89             $Bio::DB::Taxonomy::list::prefix = 'sv';
90              
91              
92             =head2 new
93              
94             Title : new
95             Usage : my $obj = Bio::DB::Taxonomy::silva->new();
96             Function: Builds a new Bio::DB::Taxonomy::silva object
97             Returns : an instance of Bio::DB::Taxonomy::silva
98             Args : -taxofile => name of the FASTA file containing the taxonomic information,
99             typically 'SSURef_108_tax_silva_trunc.fasta' (mandatory)
100              
101             =cut
102              
103             sub new {
104             # Override Bio::DB::Taxonomy
105 2     2 1 3 my($class, @args) = @_;
106 2         9 my $self = $class->SUPER::new(@args);
107 2         11 my ($taxofile) = $self->_rearrange([qw(TAXOFILE)], @args);
108            
109 2 100       5 if ( $taxofile ) {
110 1         2 $self = $self->_build_taxonomy($taxofile);
111             }
112              
113 2         16 return $self;
114             }
115              
116              
117             sub _build_taxonomy {
118 1     1   2 my ($self, $taxofile) = @_;
119              
120 1         3 my $taxonomy = Bio::DB::Taxonomy::list->new();
121 1         2 my %taxas;
122 1         8 my $desc_re = qr/^>\S+?(?:\s+(.+))?$/;
123              
124             # One could open the file using Bio::SeqIO::fasta, but it is slower and we
125             # only need the sequence descriptions
126              
127 1 50       39 open my $in, '<', $taxofile or $self->throw("Could not read file '$taxofile': $!");
128              
129             # Populate taxonomy with taxonomy obtained from sequence description
130 1         18 while (my $line = <$in>) {
131              
132 162 100       679 next if $line !~ $desc_re;
133 57         84 my $taxo_string = $1;
134 57 50       65 next if not $taxo_string;
135              
136             # Example of taxonomy string:
137             # 1/ Bacteria;Firmicutes;Bacilli;Lactobacillales;Enterococcaceae;Enterococcus;Enterococcus faecium DO
138             # 2/ Eukaryota;Metazoa;Chordata;Craniata;Vertebrata;Euteleostomi;Mammalia;Eutheria;Euarchontoglires;Glires;
139             # Rodentia;Sciurognathi;Muroidea;Muridae;Murinae;Rattus;;Rattus norvegicus (Norway rat)
140            
141             # Skip already seen taxas
142 57 100       126 next if exists $taxas{$taxo_string};
143 42         68 $taxas{$taxo_string} = undef;
144              
145             # Strip the common name (could save it if Bio::DB::Taxonomy::list supported it)
146 42         56 $taxo_string =~ s/ \(.*\)$//;
147              
148             # Save lineage
149             # Unfortunately, we cannot easily add ranks since they vary from 2 to 23 for every entry
150 42         137 my @names = split /;/, $taxo_string;
151 42         103 $taxonomy->add_lineage(
152             -names => \@names,
153             );
154              
155             }
156              
157 1         9 close $in;
158              
159 1         11 return $taxonomy;
160             }
161              
162              
163             1;