File Coverage

Bio/Species.pm
Criterion Covered Total %
statement 140 192 72.9
branch 60 92 65.2
condition 29 48 60.4
subroutine 22 45 48.8
pod 17 38 44.7
total 268 415 64.5


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Species
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by James Gilbert
7             # Reimplemented by Sendu Bala
8             # Re-reimplemented by Chris Fields
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Species - Generic species object.
17              
18             =head1 SYNOPSIS
19              
20             $species = Bio::Species->new(-classification => [@classification]);
21             # Can also pass classification
22             # array to new as below
23              
24             $species->classification(qw( sapiens Homo Hominidae
25             Catarrhini Primates Eutheria
26             Mammalia Vertebrata Chordata
27             Metazoa Eukaryota ));
28              
29             $genus = $species->genus();
30              
31             $bi = $species->binomial(); # $bi is now "Homo sapiens"
32              
33             # For storing common name
34             $species->common_name("human");
35              
36             # For storing subspecies
37             $species->sub_species("accountant");
38              
39             =head1 DESCRIPTION
40              
41             B
42             Please use that class instead.>
43              
44             Provides a very simple object for storing phylogenetic information. The
45             classification is stored in an array, which is a list of nodes in a phylogenetic
46             tree. Access to getting and setting species and genus is provided, but not to
47             any of the other node types (eg: "phylum", "class", "order", "family"). There's
48             plenty of scope for making the model more sophisticated, if this is ever needed.
49              
50             A methods are also provided for storing common names, and subspecies.
51              
52             =head1 FEEDBACK
53              
54             =head2 Mailing Lists
55              
56             User feedback is an integral part of the evolution of this and other
57             Bioperl modules. Send your comments and suggestions preferably to
58             the Bioperl mailing list. Your participation is much appreciated.
59              
60             bioperl-l@bioperl.org - General discussion
61             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62              
63             =head2 Support
64              
65             Please direct usage questions or support issues to the mailing list:
66              
67             I
68              
69             rather than to the module maintainer directly. Many experienced and
70             reponsive experts will be able look at the problem and quickly
71             address it. Please include a thorough description of the problem
72             with code and data examples if at all possible.
73              
74             =head2 Reporting Bugs
75              
76             Report bugs to the Bioperl bug tracking system to help us keep track
77             of the bugs and their resolution. Bug reports can be submitted via the
78             web:
79              
80             https://github.com/bioperl/bioperl-live/issues
81              
82             =head1 AUTHOR
83              
84             James Gilbert email B
85              
86             =head1 CONTRIBUTORS
87              
88             Sendu Bala, bix@sendu.me.uk
89             Chris Fields, cjfields at bioperl dot org
90              
91             =head1 APPENDIX
92              
93             The rest of the documentation details each of the object
94             methods. Internal methods are usually preceded with a _
95              
96             =cut
97              
98             #' Let the code begin...
99              
100             package Bio::Species;
101 42     42   1796 use strict;
  42         77  
  42         1087  
102 42     42   188 use warnings;
  42         60  
  42         1188  
103              
104 42     42   11473 use Bio::DB::Taxonomy;
  42         95  
  42         1172  
105 42     42   253 use Bio::Tree::Tree;
  42         71  
  42         688  
106 42     42   15228 use Bio::Taxon;
  42         88  
  42         1291  
107 42     42   284 use base qw(Bio::Root::Root Bio::Tree::NodeI);
  42         72  
  42         75994  
108              
109             =head2 new
110              
111             Title : new
112             Usage : my $obj = Bio::Species->new(-classification => \@class)
113             Function: Build a new Species object
114             Returns : Bio::Species object
115             Args : -ncbi_taxid => NCBI taxonomic ID (optional)
116             -classification => arrayref of classification
117              
118             =cut
119              
120             sub new {
121 251     251 1 61315 my($class, @args) = @_;
122            
123 251         1100 my $self = $class->SUPER::new(@args);
124            
125             # Bio::Species is now just a proxy object that just observes the NodeI
126             # interface methods but delegates them to the proper classes (Bio::Taxon and
127             # Bio::Tree::Tree). This will be surplanted by the much simpler
128             # Bio::Taxon/Bio::DB::Taxonomy modules in the future.
129            
130             # Using a proxy allows proper GC w/o using weaken(). This just wraps the
131             # older instances, which have no reciprocal refs (thus no circular refs).
132             # This can then run proper cleanup
133            
134 251         1847 $self->taxon(Bio::Taxon->new(@args));
135            
136 251         1291 my ($org, $sp, $var, $classification) =
137             $self->_rearrange([qw(ORGANELLE
138             SUB_SPECIES
139             VARIANT
140             CLASSIFICATION)], @args);
141            
142 251 100 66     1283 if (defined $classification && ref($classification) eq "ARRAY" && @{$classification}) {
  16   66     58  
143 16         56 $self->classification(@$classification);
144             }
145             else {
146 235         1836 $self->tree(Bio::Tree::Tree->new());
147             }
148            
149 251 50       673 defined $org && $self->organelle($org);
150 251 100       680 defined $sp && $self->sub_species($sp);
151 251 50       675 defined $var && $self->variant($var);
152            
153 251         686 return $self;
154             }
155              
156             =head2 classification
157              
158             Title : classification
159             Usage : $self->classification(@class_array);
160             @classification = $self->classification();
161             Function: Get/set the lineage of this species. The array provided must be in
162             the order ... ---> SPECIES, GENUS ---> KINGDOM ---> etc.
163             Example : $obj->classification(qw( 'Homo sapiens' Homo Hominidae
164             Catarrhini Primates Eutheria Mammalia Vertebrata
165             Chordata Metazoa Eukaryota));
166             Returns : Classification array
167             Args : Classification array
168             OR
169             A reference to the classification array. In the latter case
170             if there is a second argument and it evaluates to true,
171             names will not be validated. NB: in any case, names are never
172             validated anyway.
173              
174             =cut
175              
176             sub classification {
177 305     305 1 1206 my ($self, @vals) = @_;
178              
179 305         818 my $taxon = $self->taxon;
180              
181 305 100       865 if (@vals) {
182 252 100       902 if (ref($vals[0]) eq 'ARRAY') {
183 2         4 @vals = @{$vals[0]};
  2         8  
184             }
185            
186 252   100     672 $vals[1] ||= '';
187             # make sure the lineage contains us as first or second element
188             # (lineage may have subspecies, species, genus ...)
189 252         770 my $name = $taxon->node_name;
190 252         1072 my ($genus, $species) = (quotemeta($vals[1]), quotemeta($vals[0]));
191 252 50 33     3894 if ($name &&
      66        
      33        
192             ($name !~ m{$species}i && $name !~ m{$genus}i) &&
193             $name !~ m{$genus $species}i) {
194 0 0       0 if ($name =~ /^$genus $species\s*(.+)/) {
195             # just assume the problem is someone tried to make a Bio::Species starting at subspecies
196             #*** no idea if this is appropriate! just a possible fix related to bug 2092
197 0         0 $self->sub_species($1);
198 0         0 $name = $taxon->node_name("$vals[1] $vals[0]");
199             }
200             else {
201 0         0 $self->warn("The supplied lineage does not start near '$name' (I was supplied '".join(" | ", @vals)."')");
202             }
203             }
204            
205             # create a lineage for ourselves
206 252         2910 my $db = Bio::DB::Taxonomy->new(-source => 'list', -names => [reverse @vals]);
207 252 100       1173 unless ($taxon->scientific_name) {
208             # assume we're supposed to be the leaf of the supplied lineage
209 42         104 $self->taxon->scientific_name($vals[0]);
210             }
211 252 100       1023 unless ($taxon->rank) {
212             # and that we are rank species
213 250         613 $taxon->rank('species');
214             }
215            
216 252         1077 $taxon->db_handle($db);
217            
218 252         1309 $self->tree(Bio::Tree::Tree->new(-node => $taxon));
219             }
220            
221 305         1377 @vals = ();
222 305         851 foreach my $node ($self->tree->get_lineage_nodes($taxon), $taxon) {
223 2864   100     4575 unshift(@vals, $node->scientific_name || next);
224             }
225 305         1479 return @vals;
226             }
227              
228             =head2 ncbi_taxid
229              
230             Title : ncbi_taxid
231             Usage : $obj->ncbi_taxid($newval)
232             Function: Get/set the NCBI Taxon ID
233             Returns : the NCBI Taxon ID as a string
234             Args : newvalue to set or undef to unset (optional)
235              
236             =cut
237              
238             =head2 common_name
239              
240             Title : common_name
241             Usage : $self->common_name( $common_name );
242             $common_name = $self->common_name();
243             Function: Get or set the common name of the species
244             Example : $self->common_name('human')
245             Returns : The common name in a string
246             Args : String, which is the common name (optional)
247              
248             =cut
249              
250             =head2 division
251              
252             Title : division
253             Usage : $obj->division($newval)
254             Function: Genbank Division for a species
255             Returns : value of division (a scalar)
256             Args : value of division (a scalar)
257              
258             =cut
259              
260             =head2 species
261              
262             Title : species
263             Usage : $self->species( $species );
264             $species = $self->species();
265             Function: Get or set the species name.
266             Note that this is NOT genus and species
267             -- use $self->binomial() for that.
268             Example : $self->species('sapiens');
269             Returns : species name as string (NOT genus and species)
270             Args : species name as string (NOT genus and species)
271              
272             =cut
273              
274             sub species {
275 130     130 1 309 my ($self, $species) = @_;
276            
277 130 100       310 if ($species) {
278 11         47 $self->{_species} = $species;
279             }
280              
281 130 100       351 unless (defined $self->{_species}) {
282             # work it out from our nodes
283 61         194 my $species_taxon = $self->tree->find_node(-rank => 'species');
284 61 50       181 unless ($species_taxon) {
285             # just assume we are rank species
286 0         0 $species_taxon = $self->taxon;
287             }
288              
289 61         368 $species = $species_taxon->scientific_name;
290            
291             #
292             # munge it like the Bio::SeqIO modules used to do
293             # (more or less copy/pasted from old Bio::SeqIO::genbank, hence comments
294             # referring to 'ORGANISM' etc.)
295             #
296              
297 61         182 my $root = $self->tree->get_root_node;
298 61 50       202 unless ($root) {
299 0         0 $self->tree(Bio::Tree::Tree->new(-node => $species_taxon));
300 0         0 $root = $self->tree->get_root_node;
301             }
302            
303 61         241 my @spflds = split(' ', $species);
304 61 100 66     276 if (@spflds > 1 && $root->node_name ne 'Viruses') {
305 43         83 $species = undef;
306              
307             # does the next term start with uppercase?
308             # yes: valid genus; no then unconventional
309             # e.g. leaf litter basidiomycete sp. Collb2-39
310 43         77 my $genus;
311 43 50       237 if ($spflds[0] =~ m/^[A-Z]/) {
312 43         97 $genus = shift(@spflds);
313             }
314             else {
315 0         0 undef $genus;
316             }
317            
318 43         67 my $sub_species;
319 43 50       150 if (@spflds) {
320 43         170 while (my $fld = shift @spflds) {
321 45         144 $species .= "$fld ";
322             # does it have subspecies or varieties?
323 45 100       314 last if ($fld =~ m/(sp\.|var\.)/);
324             }
325 43         119 chop $species; # last space
326 43 100       123 $sub_species = join ' ',@spflds if(@spflds);
327             }
328             else {
329 0         0 $species = 'sp.';
330             }
331              
332             # does ORGANISM start with any words which make its genus undefined?
333             # these are in @unkn_genus
334             # this in case species starts with uppercase so isn't caught above.
335             # alter common name if required
336 43         88 my $unconv = 0; # is it unconventional species name?
337 43         147 my @unkn_genus = ('unknown','unclassified','uncultured','unidentified');
338 43         115 foreach (@unkn_genus) {
339 172 50 33     2204 if ($genus && $genus =~ m/$_/i) {
    50          
340 0         0 $species = $genus . " " . $species;
341 0         0 undef $genus;
342 0         0 $unconv = 1;
343 0         0 last;
344             }
345             elsif ($species =~ m/$_/i) {
346 0         0 $unconv = 1;
347 0         0 last;
348             }
349             }
350 43 50 66     451 if (!$unconv && !$sub_species && $species =~ s/^(\w+)\s(\w+)$/$1/) {
      66        
351             # need to extract subspecies from conventional ORGANISM format.
352             # Will the 'word' in a two element species name
353             # e.g. $species = 'thummi thummi' => $species='thummi' &
354             # $sub_species='thummi'
355 0         0 $sub_species = $2;
356             }
357              
358 43 50       242 $self->genus($genus) if $genus;
359 43 100       132 $self->sub_species($sub_species) if $sub_species;
360             }
361              
362 61         191 $self->{_species} = $species;
363             }
364 130         387 return $self->{_species};
365             }
366              
367             =head2 genus
368              
369             Title : genus
370             Usage : $self->genus( $genus );
371             $genus = $self->genus();
372             Function: Get or set the scientific genus name.
373             Example : $self->genus('Homo');
374             Returns : Scientific genus name as string
375             Args : Scientific genus name as string
376              
377             =cut
378              
379             sub genus {
380 190     190 1 393 my ($self, $genus) = @_;
381              
382             # TODO: instead of caching the raw name, cache the actual node instance.
383 190 100       428 if ($genus) {
384 64         175 $self->{_genus} = $genus;
385             }
386 190 100       459 unless (defined $self->{_genus}) {
387 18         55 my $genus_taxon = $self->tree->find_node(-rank => 'genus');
388 18 50       61 unless ($genus_taxon) {
389             # just assume our ancestor is rank genus
390 18         60 $genus_taxon = $self->taxon->ancestor;
391             }
392            
393 18 50       136 $self->{_genus} = $genus_taxon->scientific_name if $genus_taxon;
394             }
395            
396 190         435 return $self->{_genus};
397             }
398              
399             =head2 sub_species
400              
401             Title : sub_species
402             Usage : $obj->sub_species($newval)
403             Function: Get or set the scientific subspecies name.
404             Returns : value of sub_species
405             Args : newvalue (optional)
406              
407             =cut
408              
409             sub sub_species {
410 10     10 1 22 my ($self, $sub) = @_;
411            
412             # TODO: instead of caching the raw name, cache the actual node instance.
413 10 100       29 if (!defined $self->{'_sub_species'}) {
414 6         17 my $ss_taxon = $self->tree->find_node(-rank => 'subspecies');
415 6 50       25 if ($ss_taxon) {
416 0 0       0 if ($sub) {
417 0         0 $ss_taxon->scientific_name($sub);
418            
419             # *** weakening ref to our root node in species() to solve a
420             # memory leak means that we have a subspecies taxon to set
421             # during the first call to species(), but it has vanished by
422             # the time a user subsequently calls sub_species() to get the
423             # value. So we 'cheat' and just store the subspecies name in
424             # our self hash, instead of the tree. Is this a problem for
425             # a Species object? Can't decide --sendu
426            
427             # This can now be changed to deal with this information on the
428             # fly. For now, the caching remains, but maybe we should just
429             # let these things deal with mutable data as needed? -- cjfields
430            
431 0         0 $self->{'_sub_species'} = $sub;
432             }
433 0         0 return $ss_taxon->scientific_name;
434             }
435             else {
436             # should we create a node here to be added to the tree?
437             }
438             }
439            
440             # fall back to direct storage on self
441 10 100       30 $self->{'_sub_species'} = $sub if $sub;
442 10         22 return $self->{'_sub_species'};
443             }
444              
445             =head2 variant
446              
447             Title : variant
448             Usage : $obj->variant($newval)
449             Function: Get/set variant information for this species object (strain,
450             isolate, etc).
451             Example :
452             Returns : value of variant (a scalar)
453             Args : new value (a scalar or undef, optional)
454              
455             =cut
456              
457             sub variant{
458 3     3 1 10 my ($self, $var) = @_;
459            
460             # TODO: instead of caching the raw name, cache the actual node instance.
461 3 50       15 if (!defined $self->{'_variant'}) {
462 3         12 my $var_taxon = $self->tree->find_node(-rank => 'variant');
463 3 50       14 if ($var_taxon) {
464 0 0       0 if ($var) {
465 0         0 $var_taxon->scientific_name($var);
466             }
467 0         0 return $var_taxon->scientific_name;
468             }
469             else {
470             # should we create a node here to be added to the tree?
471             }
472             }
473            
474             # fall back to direct storage on self
475 3 50       13 $self->{'_variant'} = $var if $var;
476 3         16 return $self->{'_variant'};
477             }
478              
479             =head2 binomial
480              
481             Title : binomial
482             Usage : $binomial = $self->binomial();
483             $binomial = $self->binomial('FULL');
484             Function: Returns a string "Genus species", or "Genus species subspecies",
485             if the first argument is 'FULL' (and the species has a subspecies).
486             Args : Optionally the string 'FULL' to get the full name including
487             the subspecies.
488             Note : This is just munged from the taxon() name
489              
490             =cut
491              
492             sub binomial {
493 108     108 1 282 my ($self, $full) = @_;
494 108   50     351 my $rank = $self->taxon->rank || 'no rank';
495            
496 108         393 my ($species, $genus) = ($self->species, $self->genus);
497 108 50       280 unless (defined $species) {
498 0         0 $species = 'sp.';
499 0         0 $self->warn("requested binomial but classification was not set");
500             }
501 108 50       232 $genus = '' unless( defined $genus);
502            
503 108         1166 $species =~ s/$genus\s+//;
504            
505 108         314 my $bi = "$genus $species";
506 108 100 66     336 if (defined($full) && $full =~ /full/i) {
507 3         7 my $ssp = $self->sub_species;
508 3 50       9 if ($ssp) {
509 3         35 $ssp =~ s/$bi\s+//;
510 3         29 $ssp =~ s/$species\s+//;
511 3         10 $bi .= " $ssp";
512             }
513             }
514 108         475 return $bi;
515             }
516              
517             =head2 validate_species_name
518              
519             Title : validate_species_name
520             Usage : $result = $self->validate_species_name($string);
521             Function: Validate the species portion of the binomial
522             Args : string
523             Notes : The string following the "genus name" in the NCBI binomial is so
524             variable that it's not clear that this is a useful function. Consider
525             the binomials "Simian 11 rotavirus (serotype 3 / strain
526             SA11-Patton)", or "St. Thomas 3 rotavirus", straight from GenBank.
527             This is particularly problematic in microbes and viruses. As such,
528             this isn't actually used automatically by any Bio::Species method.
529              
530             =cut
531              
532             sub validate_species_name {
533 0     0 1 0 my( $self, $string ) = @_;
534              
535 0 0       0 return 1 if $string eq "sp.";
536 0 0       0 return 1 if $string =~ /strain/;
537 0 0       0 return 1 if $string =~ /^[a-z][\w\s-]+$/i;
538 0         0 $self->throw("Invalid species name '$string'");
539             }
540              
541             sub validate_name {
542 0     0 0 0 return 1;
543             }
544              
545             =head2 organelle
546              
547             Title : organelle
548             Usage : $self->organelle( $organelle );
549             $organelle = $self->organelle();
550             Function: Get or set the organelle name
551             Example : $self->organelle('Chloroplast')
552             Returns : The organelle name in a string
553             Args : String, which is the organelle name
554             Note : TODO: We currently do not know where the organelle definition will
555             eventually go. This is stored in the source seqfeature, though,
556             so the information isn't lost.
557              
558             =cut
559              
560             sub organelle {
561 42     42 1 111 my($self) = shift;
562 42 100       152 return $self->{'_organelle'} = shift if @_;
563 31         128 return $self->{'_organelle'};
564             }
565              
566             =head2 Delegation
567              
568             The following methods delegate to the internal Bio::Taxon instance. This is
569             mainly to allow code continue using older methods, with the mind to migrate to
570             using Bio::Taxon and related methods when this class is deprecated.
571              
572             =cut
573              
574 0     0 0 0 sub node_name {shift->taxon->node_name(@_)}
575 268     268 0 673 sub scientific_name {shift->taxon->node_name(@_)}
576              
577 0     0 1 0 sub id {shift->taxon->id(@_)}
578 0     0 0 0 sub object_id {shift->taxon->id(@_)}
579 361     361 1 2075 sub ncbi_taxid {shift->taxon->ncbi_taxid(@_)}
580 1     1 0 3 sub rank {shift->taxon->rank(@_)}
581 0     0 1 0 sub division {shift->taxon->division(@_)}
582              
583 0     0 0 0 sub common_names {shift->taxon->common_names(@_)}
584 128     128 1 518 sub common_name {shift->taxon->common_names(@_)}
585              
586 0     0 0 0 sub genetic_code {shift->taxon->genetic_code(@_)}
587 0     0 0 0 sub mitochondrial_genetic_code {shift->taxon->mitochondrial_genetic_code(@_)}
588              
589 0     0 0 0 sub create_date { shift->taxon->create_date(@_)}
590 0     0 0 0 sub pub_date { shift->taxon->pub_date(@_)}
591 0     0 0 0 sub update_date { shift->taxon->update_date(@_)}
592              
593 0     0 0 0 sub db_handle { shift->taxon->db_handle(@_)}
594              
595 0     0 0 0 sub parent_id { shift->taxon->parent_id(@_)}
596 0     0 0 0 sub parent_taxon_id { shift->taxon->parent_id(@_)}
597              
598 0     0 0 0 sub version { shift->taxon->version(@_)}
599 0     0 0 0 sub authority { shift->taxon->authority(@_)}
600 0     0 0 0 sub namespace { shift->taxon->namespace(@_)}
601              
602 0     0 1 0 sub ancestor { shift->taxon->ancestor(@_)}
603 0     0 0 0 sub get_Parent_Node { shift->taxon->get_Parent_Node(@_)}
604 0     0 1 0 sub each_Descendent { shift->taxon->each_Descendent(@_)}
605 0     0 0 0 sub get_Children_Nodes { shift->taxon->get_Children_Nodes(@_)}
606 0     0 0 0 sub remove_Descendant { shift->taxon->remove_Descendant(@_)}
607              
608 213     213 0 635 sub name { shift->taxon->name(@_)}
609              
610             =head2 taxon
611              
612             Title : taxon
613             Usage : $obj->taxon
614             Function : retrieve the internal Bio::Taxon instance
615             Returns : A Bio::Taxon. If one is not previously set,
616             an instance is created lazily
617             Args : Bio::Taxon (optional)
618            
619             =cut
620              
621             sub taxon {
622 1984     1984 1 3292 my ($self, $taxon) = @_;
623 1984 100 66     8503 if (!$self->{taxon} || $taxon) {
624 251   33     651 $taxon ||= Bio::Taxon->new();
625 251         644 $self->{taxon} = $taxon;
626             }
627 1984         5823 $self->{taxon};
628             }
629              
630             =head2 tree
631              
632             Title : tree
633             Usage : $obj->tree
634             Function : Returns a Bio::Tree::Tree object
635             Returns : A Bio::Tree::Tree. If one is not previously set,
636             an instance is created lazily
637             Args : Bio::Tree::Tree (optional)
638            
639             =cut
640              
641             sub tree {
642 1230     1230 1 2476 my ($self, $tree) = @_;
643 1230 100 100     5627 if (!$self->{tree} || $tree) {
644 487   33     1175 $tree ||= Bio::Tree::Tree->new();
645 487         1110 delete $tree->{_root_cleanup_methods};
646 487         1351 $self->{tree} = $tree;
647             }
648 1230         4733 $self->{tree};
649             }
650              
651             sub DESTROY {
652 289     289   18299 my $self = shift;
653 289         1409 $self->tree->cleanup_tree;
654 289         1460 delete $self->{tree};
655 289         1022 $self->taxon->node_cleanup;
656             }
657              
658             1;