File Coverage

Bio/Species.pm
Criterion Covered Total %
statement 140 192 72.9
branch 60 92 65.2
condition 27 48 56.2
subroutine 22 45 48.8
pod 17 38 44.7
total 266 415 64.1


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Species
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by James Gilbert
7             # Reimplemented by Sendu Bala
8             # Re-reimplemented by Chris Fields
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Species - Generic species object.
17              
18             =head1 SYNOPSIS
19              
20             $species = Bio::Species->new(-classification => [@classification]);
21             # Can also pass classification
22             # array to new as below
23              
24             $species->classification(qw( sapiens Homo Hominidae
25             Catarrhini Primates Eutheria
26             Mammalia Vertebrata Chordata
27             Metazoa Eukaryota ));
28              
29             $genus = $species->genus();
30              
31             $bi = $species->binomial(); # $bi is now "Homo sapiens"
32              
33             # For storing common name
34             $species->common_name("human");
35              
36             # For storing subspecies
37             $species->sub_species("accountant");
38              
39             =head1 DESCRIPTION
40              
41             B
42             Please use that class instead.>
43              
44             Provides a very simple object for storing phylogenetic information. The
45             classification is stored in an array, which is a list of nodes in a phylogenetic
46             tree. Access to getting and setting species and genus is provided, but not to
47             any of the other node types (eg: "phylum", "class", "order", "family"). There's
48             plenty of scope for making the model more sophisticated, if this is ever needed.
49              
50             A methods are also provided for storing common names, and subspecies.
51              
52             =head1 FEEDBACK
53              
54             =head2 Mailing Lists
55              
56             User feedback is an integral part of the evolution of this and other
57             Bioperl modules. Send your comments and suggestions preferably to
58             the Bioperl mailing list. Your participation is much appreciated.
59              
60             bioperl-l@bioperl.org - General discussion
61             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62              
63             =head2 Support
64              
65             Please direct usage questions or support issues to the mailing list:
66              
67             I
68              
69             rather than to the module maintainer directly. Many experienced and
70             reponsive experts will be able look at the problem and quickly
71             address it. Please include a thorough description of the problem
72             with code and data examples if at all possible.
73              
74             =head2 Reporting Bugs
75              
76             Report bugs to the Bioperl bug tracking system to help us keep track
77             of the bugs and their resolution. Bug reports can be submitted via the
78             web:
79              
80             https://github.com/bioperl/bioperl-live/issues
81              
82             =head1 AUTHOR
83              
84             James Gilbert email B
85              
86             =head1 CONTRIBUTORS
87              
88             Sendu Bala, bix@sendu.me.uk
89             Chris Fields, cjfields at bioperl dot org
90              
91             =head1 APPENDIX
92              
93             The rest of the documentation details each of the object
94             methods. Internal methods are usually preceded with a _
95              
96             =cut
97              
98             #' Let the code begin...
99              
100             package Bio::Species;
101 42     42   1285 use strict;
  42         47  
  42         944  
102 42     42   128 use warnings;
  42         44  
  42         1126  
103              
104 42     42   12437 use Bio::DB::Taxonomy;
  42         68  
  42         920  
105 42     42   186 use Bio::Tree::Tree;
  42         50  
  42         550  
106 42     42   14961 use Bio::Taxon;
  42         65  
  42         1266  
107 42     42   204 use base qw(Bio::Root::Root Bio::Tree::NodeI);
  42         45  
  42         69111  
108              
109             =head2 new
110              
111             Title : new
112             Usage : my $obj = Bio::Species->new(-classification => \@class)
113             Function: Build a new Species object
114             Returns : Bio::Species object
115             Args : -ncbi_taxid => NCBI taxonomic ID (optional)
116             -classification => arrayref of classification
117              
118             =cut
119              
120             sub new {
121 249     249 1 690 my($class, @args) = @_;
122            
123 249         848 my $self = $class->SUPER::new(@args);
124            
125             # Bio::Species is now just a proxy object that just observes the NodeI
126             # interface methods but delegates them to the proper classes (Bio::Taxon and
127             # Bio::Tree::Tree). This will be surplanted by the much simpler
128             # Bio::Taxon/Bio::DB::Taxonomy modules in the future.
129            
130             # Using a proxy allows proper GC w/o using weaken(). This just wraps the
131             # older instances, which have no reciprocal refs (thus no circular refs).
132             # This can then run proper cleanup
133            
134 249         1778 $self->taxon(Bio::Taxon->new(@args));
135            
136 249         1039 my ($org, $sp, $var, $classification) =
137             $self->_rearrange([qw(ORGANELLE
138             SUB_SPECIES
139             VARIANT
140             CLASSIFICATION)], @args);
141            
142 249 100 66     979 if (defined $classification && ref($classification) eq "ARRAY" && @{$classification}) {
  14   66     46  
143 14         47 $self->classification(@$classification);
144             }
145             else {
146 235         1481 $self->tree(Bio::Tree::Tree->new());
147             }
148            
149 249 50       560 defined $org && $self->organelle($org);
150 249 100       509 defined $sp && $self->sub_species($sp);
151 249 50       529 defined $var && $self->variant($var);
152            
153 249         574 return $self;
154             }
155              
156             =head2 classification
157              
158             Title : classification
159             Usage : $self->classification(@class_array);
160             @classification = $self->classification();
161             Function: Get/set the lineage of this species. The array provided must be in
162             the order ... ---> SPECIES, GENUS ---> KINGDOM ---> etc.
163             Example : $obj->classification(qw( 'Homo sapiens' Homo Hominidae
164             Catarrhini Primates Eutheria Mammalia Vertebrata
165             Chordata Metazoa Eukaryota));
166             Returns : Classification array
167             Args : Classification array
168             OR
169             A reference to the classification array. In the latter case
170             if there is a second argument and it evaluates to true,
171             names will not be validated. NB: in any case, names are never
172             validated anyway.
173              
174             =cut
175              
176             sub classification {
177 303     303 1 869 my ($self, @vals) = @_;
178              
179 303         599 my $taxon = $self->taxon;
180              
181 303 100       789 if (@vals) {
182 250 100       673 if (ref($vals[0]) eq 'ARRAY') {
183 2         3 @vals = @{$vals[0]};
  2         14  
184             }
185            
186 250   50     510 $vals[1] ||= '';
187             # make sure the lineage contains us as first or second element
188             # (lineage may have subspecies, species, genus ...)
189 250         551 my $name = $taxon->node_name;
190 250         725 my ($genus, $species) = (quotemeta($vals[1]), quotemeta($vals[0]));
191 250 50 33     3413 if ($name &&
      66        
      33        
192             ($name !~ m{$species}i && $name !~ m{$genus}i) &&
193             $name !~ m{$genus $species}i) {
194 0 0       0 if ($name =~ /^$genus $species\s*(.+)/) {
195             # just assume the problem is someone tried to make a Bio::Species starting at subspecies
196             #*** no idea if this is appropriate! just a possible fix related to bug 2092
197 0         0 $self->sub_species($1);
198 0         0 $name = $taxon->node_name("$vals[1] $vals[0]");
199             }
200             else {
201 0         0 $self->warn("The supplied lineage does not start near '$name' (I was supplied '".join(" | ", @vals)."')");
202             }
203             }
204            
205             # create a lineage for ourselves
206 250         2396 my $db = Bio::DB::Taxonomy->new(-source => 'list', -names => [reverse @vals]);
207 250 100       970 unless ($taxon->scientific_name) {
208             # assume we're supposed to be the leaf of the supplied lineage
209 40         75 $self->taxon->scientific_name($vals[0]);
210             }
211 250 100       780 unless ($taxon->rank) {
212             # and that we are rank species
213 248         539 $taxon->rank('species');
214             }
215            
216 250         683 $taxon->db_handle($db);
217            
218 250         1233 $self->tree(Bio::Tree::Tree->new(-node => $taxon));
219             }
220            
221 303         1415 @vals = ();
222 303         689 foreach my $node ($self->tree->get_lineage_nodes($taxon), $taxon) {
223 2851   50     3667 unshift(@vals, $node->scientific_name || next);
224             }
225 303         1099 return @vals;
226             }
227              
228             =head2 ncbi_taxid
229              
230             Title : ncbi_taxid
231             Usage : $obj->ncbi_taxid($newval)
232             Function: Get/set the NCBI Taxon ID
233             Returns : the NCBI Taxon ID as a string
234             Args : newvalue to set or undef to unset (optional)
235              
236             =cut
237              
238             =head2 common_name
239              
240             Title : common_name
241             Usage : $self->common_name( $common_name );
242             $common_name = $self->common_name();
243             Function: Get or set the common name of the species
244             Example : $self->common_name('human')
245             Returns : The common name in a string
246             Args : String, which is the common name (optional)
247              
248             =cut
249              
250             =head2 division
251              
252             Title : division
253             Usage : $obj->division($newval)
254             Function: Genbank Division for a species
255             Returns : value of division (a scalar)
256             Args : value of division (a scalar)
257              
258             =cut
259              
260             =head2 species
261              
262             Title : species
263             Usage : $self->species( $species );
264             $species = $self->species();
265             Function: Get or set the species name.
266             Note that this is NOT genus and species
267             -- use $self->binomial() for that.
268             Example : $self->species('sapiens');
269             Returns : species name as string (NOT genus and species)
270             Args : species name as string (NOT genus and species)
271              
272             =cut
273              
274             sub species {
275 130     130 1 186 my ($self, $species) = @_;
276            
277 130 100       266 if ($species) {
278 11         25 $self->{_species} = $species;
279             }
280              
281 130 100       291 unless (defined $self->{_species}) {
282             # work it out from our nodes
283 61         133 my $species_taxon = $self->tree->find_node(-rank => 'species');
284 61 50       163 unless ($species_taxon) {
285             # just assume we are rank species
286 0         0 $species_taxon = $self->taxon;
287             }
288              
289 61         190 $species = $species_taxon->scientific_name;
290            
291             #
292             # munge it like the Bio::SeqIO modules used to do
293             # (more or less copy/pasted from old Bio::SeqIO::genbank, hence comments
294             # referring to 'ORGANISM' etc.)
295             #
296              
297 61         153 my $root = $self->tree->get_root_node;
298 61 50       150 unless ($root) {
299 0         0 $self->tree(Bio::Tree::Tree->new(-node => $species_taxon));
300 0         0 $root = $self->tree->get_root_node;
301             }
302            
303 61         189 my @spflds = split(' ', $species);
304 61 100 66     231 if (@spflds > 1 && $root->node_name ne 'Viruses') {
305 43         53 $species = undef;
306              
307             # does the next term start with uppercase?
308             # yes: valid genus; no then unconventional
309             # e.g. leaf litter basidiomycete sp. Collb2-39
310 43         46 my $genus;
311 43 50       214 if ($spflds[0] =~ m/^[A-Z]/) {
312 43         67 $genus = shift(@spflds);
313             }
314             else {
315 0         0 undef $genus;
316             }
317            
318 43         49 my $sub_species;
319 43 50       95 if (@spflds) {
320 43         145 while (my $fld = shift @spflds) {
321 45         91 $species .= "$fld ";
322             # does it have subspecies or varieties?
323 45 100       267 last if ($fld =~ m/(sp\.|var\.)/);
324             }
325 43         76 chop $species; # last space
326 43 100       101 $sub_species = join ' ',@spflds if(@spflds);
327             }
328             else {
329 0         0 $species = 'sp.';
330             }
331              
332             # does ORGANISM start with any words which make its genus undefined?
333             # these are in @unkn_genus
334             # this in case species starts with uppercase so isn't caught above.
335             # alter common name if required
336 43         59 my $unconv = 0; # is it unconventional species name?
337 43         104 my @unkn_genus = ('unknown','unclassified','uncultured','unidentified');
338 43         87 foreach (@unkn_genus) {
339 172 50 33     2005 if ($genus && $genus =~ m/$_/i) {
    50          
340 0         0 $species = $genus . " " . $species;
341 0         0 undef $genus;
342 0         0 $unconv = 1;
343 0         0 last;
344             }
345             elsif ($species =~ m/$_/i) {
346 0         0 $unconv = 1;
347 0         0 last;
348             }
349             }
350 43 50 66     443 if (!$unconv && !$sub_species && $species =~ s/^(\w+)\s(\w+)$/$1/) {
      66        
351             # need to extract subspecies from conventional ORGANISM format.
352             # Will the 'word' in a two element species name
353             # e.g. $species = 'thummi thummi' => $species='thummi' &
354             # $sub_species='thummi'
355 0         0 $sub_species = $2;
356             }
357              
358 43 50       202 $self->genus($genus) if $genus;
359 43 100       116 $self->sub_species($sub_species) if $sub_species;
360             }
361              
362 61         131 $self->{_species} = $species;
363             }
364 130         297 return $self->{_species};
365             }
366              
367             =head2 genus
368              
369             Title : genus
370             Usage : $self->genus( $genus );
371             $genus = $self->genus();
372             Function: Get or set the scientific genus name.
373             Example : $self->genus('Homo');
374             Returns : Scientific genus name as string
375             Args : Scientific genus name as string
376              
377             =cut
378              
379             sub genus {
380 190     190 1 234 my ($self, $genus) = @_;
381              
382             # TODO: instead of caching the raw name, cache the actual node instance.
383 190 100       352 if ($genus) {
384 64         148 $self->{_genus} = $genus;
385             }
386 190 100       395 unless (defined $self->{_genus}) {
387 18         39 my $genus_taxon = $self->tree->find_node(-rank => 'genus');
388 18 50       52 unless ($genus_taxon) {
389             # just assume our ancestor is rank genus
390 18         43 $genus_taxon = $self->taxon->ancestor;
391             }
392            
393 18 50       65 $self->{_genus} = $genus_taxon->scientific_name if $genus_taxon;
394             }
395            
396 190         324 return $self->{_genus};
397             }
398              
399             =head2 sub_species
400              
401             Title : sub_species
402             Usage : $obj->sub_species($newval)
403             Function: Get or set the scientific subspecies name.
404             Returns : value of sub_species
405             Args : newvalue (optional)
406              
407             =cut
408              
409             sub sub_species {
410 10     10 1 13 my ($self, $sub) = @_;
411            
412             # TODO: instead of caching the raw name, cache the actual node instance.
413 10 100       25 if (!defined $self->{'_sub_species'}) {
414 6         16 my $ss_taxon = $self->tree->find_node(-rank => 'subspecies');
415 6 50       19 if ($ss_taxon) {
416 0 0       0 if ($sub) {
417 0         0 $ss_taxon->scientific_name($sub);
418            
419             # *** weakening ref to our root node in species() to solve a
420             # memory leak means that we have a subspecies taxon to set
421             # during the first call to species(), but it has vanished by
422             # the time a user subsequently calls sub_species() to get the
423             # value. So we 'cheat' and just store the subspecies name in
424             # our self hash, instead of the tree. Is this a problem for
425             # a Species object? Can't decide --sendu
426            
427             # This can now be changed to deal with this information on the
428             # fly. For now, the caching remains, but maybe we should just
429             # let these things deal with mutable data as needed? -- cjfields
430            
431 0         0 $self->{'_sub_species'} = $sub;
432             }
433 0         0 return $ss_taxon->scientific_name;
434             }
435             else {
436             # should we create a node here to be added to the tree?
437             }
438             }
439            
440             # fall back to direct storage on self
441 10 100       23 $self->{'_sub_species'} = $sub if $sub;
442 10         21 return $self->{'_sub_species'};
443             }
444              
445             =head2 variant
446              
447             Title : variant
448             Usage : $obj->variant($newval)
449             Function: Get/set variant information for this species object (strain,
450             isolate, etc).
451             Example :
452             Returns : value of variant (a scalar)
453             Args : new value (a scalar or undef, optional)
454              
455             =cut
456              
457             sub variant{
458 3     3 1 7 my ($self, $var) = @_;
459            
460             # TODO: instead of caching the raw name, cache the actual node instance.
461 3 50       12 if (!defined $self->{'_variant'}) {
462 3         10 my $var_taxon = $self->tree->find_node(-rank => 'variant');
463 3 50       12 if ($var_taxon) {
464 0 0       0 if ($var) {
465 0         0 $var_taxon->scientific_name($var);
466             }
467 0         0 return $var_taxon->scientific_name;
468             }
469             else {
470             # should we create a node here to be added to the tree?
471             }
472             }
473            
474             # fall back to direct storage on self
475 3 50       11 $self->{'_variant'} = $var if $var;
476 3         16 return $self->{'_variant'};
477             }
478              
479             =head2 binomial
480              
481             Title : binomial
482             Usage : $binomial = $self->binomial();
483             $binomial = $self->binomial('FULL');
484             Function: Returns a string "Genus species", or "Genus species subspecies",
485             if the first argument is 'FULL' (and the species has a subspecies).
486             Args : Optionally the string 'FULL' to get the full name including
487             the subspecies.
488             Note : This is just munged from the taxon() name
489              
490             =cut
491              
492             sub binomial {
493 108     108 1 153 my ($self, $full) = @_;
494 108   50     231 my $rank = $self->taxon->rank || 'no rank';
495            
496 108         275 my ($species, $genus) = ($self->species, $self->genus);
497 108 50       208 unless (defined $species) {
498 0         0 $species = 'sp.';
499 0         0 $self->warn("requested binomial but classification was not set");
500             }
501 108 50       187 $genus = '' unless( defined $genus);
502            
503 108         855 $species =~ s/$genus\s+//;
504            
505 108         206 my $bi = "$genus $species";
506 108 100 66     271 if (defined($full) && $full =~ /full/i) {
507 3         8 my $ssp = $self->sub_species;
508 3 50       11 if ($ssp) {
509 3         60 $ssp =~ s/$bi\s+//;
510 3         45 $ssp =~ s/$species\s+//;
511 3         8 $bi .= " $ssp";
512             }
513             }
514 108         401 return $bi;
515             }
516              
517             =head2 validate_species_name
518              
519             Title : validate_species_name
520             Usage : $result = $self->validate_species_name($string);
521             Function: Validate the species portion of the binomial
522             Args : string
523             Notes : The string following the "genus name" in the NCBI binomial is so
524             variable that it's not clear that this is a useful function. Consider
525             the binomials "Simian 11 rotavirus (serotype 3 / strain
526             SA11-Patton)", or "St. Thomas 3 rotavirus", straight from GenBank.
527             This is particularly problematic in microbes and viruses. As such,
528             this isn't actually used automatically by any Bio::Species method.
529              
530             =cut
531              
532             sub validate_species_name {
533 0     0 1 0 my( $self, $string ) = @_;
534              
535 0 0       0 return 1 if $string eq "sp.";
536 0 0       0 return 1 if $string =~ /strain/;
537 0 0       0 return 1 if $string =~ /^[a-z][\w\s-]+$/i;
538 0         0 $self->throw("Invalid species name '$string'");
539             }
540              
541             sub validate_name {
542 0     0 0 0 return 1;
543             }
544              
545             =head2 organelle
546              
547             Title : organelle
548             Usage : $self->organelle( $organelle );
549             $organelle = $self->organelle();
550             Function: Get or set the organelle name
551             Example : $self->organelle('Chloroplast')
552             Returns : The organelle name in a string
553             Args : String, which is the organelle name
554             Note : TODO: We currently do not know where the organelle definition will
555             eventually go. This is stored in the source seqfeature, though,
556             so the information isn't lost.
557              
558             =cut
559              
560             sub organelle {
561 42     42 1 80 my($self) = shift;
562 42 100       138 return $self->{'_organelle'} = shift if @_;
563 31         114 return $self->{'_organelle'};
564             }
565              
566             =head2 Delegation
567              
568             The following methods delegate to the internal Bio::Taxon instance. This is
569             mainly to allow code continue using older methods, with the mind to migrate to
570             using Bio::Taxon and related methods when this class is deprecated.
571              
572             =cut
573              
574 0     0 0 0 sub node_name {shift->taxon->node_name(@_)}
575 268     268 0 538 sub scientific_name {shift->taxon->node_name(@_)}
576              
577 0     0 1 0 sub id {shift->taxon->id(@_)}
578 0     0 0 0 sub object_id {shift->taxon->id(@_)}
579 361     361 1 1679 sub ncbi_taxid {shift->taxon->ncbi_taxid(@_)}
580 1     1 0 4 sub rank {shift->taxon->rank(@_)}
581 0     0 1 0 sub division {shift->taxon->division(@_)}
582              
583 0     0 0 0 sub common_names {shift->taxon->common_names(@_)}
584 128     128 1 381 sub common_name {shift->taxon->common_names(@_)}
585              
586 0     0 0 0 sub genetic_code {shift->taxon->genetic_code(@_)}
587 0     0 0 0 sub mitochondrial_genetic_code {shift->taxon->mitochondrial_genetic_code(@_)}
588              
589 0     0 0 0 sub create_date { shift->taxon->create_date(@_)}
590 0     0 0 0 sub pub_date { shift->taxon->pub_date(@_)}
591 0     0 0 0 sub update_date { shift->taxon->update_date(@_)}
592              
593 0     0 0 0 sub db_handle { shift->taxon->db_handle(@_)}
594              
595 0     0 0 0 sub parent_id { shift->taxon->parent_id(@_)}
596 0     0 0 0 sub parent_taxon_id { shift->taxon->parent_id(@_)}
597              
598 0     0 0 0 sub version { shift->taxon->version(@_)}
599 0     0 0 0 sub authority { shift->taxon->authority(@_)}
600 0     0 0 0 sub namespace { shift->taxon->namespace(@_)}
601              
602 0     0 1 0 sub ancestor { shift->taxon->ancestor(@_)}
603 0     0 0 0 sub get_Parent_Node { shift->taxon->get_Parent_Node(@_)}
604 0     0 1 0 sub each_Descendent { shift->taxon->each_Descendent(@_)}
605 0     0 0 0 sub get_Children_Nodes { shift->taxon->get_Children_Nodes(@_)}
606 0     0 0 0 sub remove_Descendant { shift->taxon->remove_Descendant(@_)}
607              
608 213     213 0 538 sub name { shift->taxon->name(@_)}
609              
610             =head2 taxon
611              
612             Title : taxon
613             Usage : $obj->taxon
614             Function : retrieve the internal Bio::Taxon instance
615             Returns : A Bio::Taxon. If one is not previously set,
616             an instance is created lazily
617             Args : Bio::Taxon (optional)
618            
619             =cut
620              
621             sub taxon {
622 1976     1976 1 2075 my ($self, $taxon) = @_;
623 1976 100 66     7511 if (!$self->{taxon} || $taxon) {
624 249   33     546 $taxon ||= Bio::Taxon->new();
625 249         487 $self->{taxon} = $taxon;
626             }
627 1976         5005 $self->{taxon};
628             }
629              
630             =head2 tree
631              
632             Title : tree
633             Usage : $obj->tree
634             Function : Returns a Bio::Tree::Tree object
635             Returns : A Bio::Tree::Tree. If one is not previously set,
636             an instance is created lazily
637             Args : Bio::Tree::Tree (optional)
638            
639             =cut
640              
641             sub tree {
642 1224     1224 1 1315 my ($self, $tree) = @_;
643 1224 100 100     4994 if (!$self->{tree} || $tree) {
644 485   33     940 $tree ||= Bio::Tree::Tree->new();
645 485         826 delete $tree->{_root_cleanup_methods};
646 485         712 $self->{tree} = $tree;
647             }
648 1224         3715 $self->{tree};
649             }
650              
651             sub DESTROY {
652 287     287   14767 my $self = shift;
653 287         805 $self->tree->cleanup_tree;
654 287         994 delete $self->{tree};
655 287         713 $self->taxon->node_cleanup;
656             }
657              
658             1;