File Coverage

blib/lib/Bio/Phylo/Parsers/Nhx.pm
Criterion Covered Total %
statement 38 40 95.0
branch 8 12 66.6
condition 1 3 33.3
subroutine 8 8 100.0
pod n/a
total 55 63 87.3


line stmt bran cond sub pod time code
1             package Bio::Phylo::Parsers::Nhx;
2 1     1   6 use warnings;
  1         1  
  1         28  
3 1     1   4 use strict;
  1         2  
  1         17  
4 1     1   3 use Bio::Phylo::IO 'parse';
  1         2  
  1         37  
5 1     1   4 use base 'Bio::Phylo::Parsers::Newick';
  1         2  
  1         278  
6 1     1   6 use Bio::Phylo::Util::CONSTANT ':namespaces';
  1         2  
  1         389  
7              
8             =head1 NAME
9              
10             Bio::Phylo::Parsers::Nhx - Parser used by Bio::Phylo::IO, no serviceable parts inside
11              
12             =head1 DESCRIPTION
13              
14             This module parses "New Hampshire eXtended" (NHX) tree descriptions in parenthetical
15             format. The node annotations, which are described here:
16             https://sites.google.com/site/cmzmasek/home/software/forester/nhx, are stored as meta
17             annotations in the namespace whose reserved prefix, nhx, is associated with the above
18             URI. This means that after this parser is done, you can fetch an annotation value thusly:
19              
20             my $gene_name = $node->get_meta_object( 'nhx:GN' );
21              
22             This parser is called by the L<Bio::Phylo::IO> facade, don't call it directly. In turn,
23             this parser delegates processing of Newick strings to L<Bio::Phylo::Parsers::Newick>.
24             As such, several additional flags can be passed to the Bio::Phylo::IO parse and parse_tree
25             functions to influence how to deal with complex newick strings:
26              
27             -keep => [ ...list of taxa names... ]
28              
29             The C<-keep> flag allows you to only retain certain taxa of interest, ignoring others
30             while building the tree object.
31              
32             -keep_whitespace => 1,
33              
34             This will treat unescaped whitespace as if it is a normal taxon name character. Normally,
35             whitespace is only retained inside quoted strings (e.g. C<'Homo sapiens'>), otherwise it
36             is the convention to use underscores (C<Homo_sapiens>). This is because some programs
37             introduce whitespace to prettify a newick string, e.g. to indicate indentation/depth,
38             in which case you almost certainly want to ignore it. This is the default behaviour. The
39             option to keep it is provided for dealing with incorrectly formatted data.
40              
41             Note that the flag C<-ignore_comments>, which is optional for the Newick parser cannot be
42             used. This is because NHX embeds its metadata in what are normally comments (i.e. square
43             brackets), so these must be processed in a special way.
44              
45             =cut
46              
47 1     1   3 sub _return_is_scalar { 1 }
48              
49              
50             sub _parse {
51 1     1   2 my $self = shift;
52 1         10 $self->_args->{'-ignore_comments'} = 1;
53 1         7 return $self->SUPER::_parse;
54             }
55              
56             sub _parse_node_data {
57 215     215   1776 my ( $self, $node, @clade ) = @_;
58 215         477 $self->_logger->debug("parsing name and branch length for node");
59 215         308 my @tail;
60 215         488 PARSE_TAIL: for ( my $i = $#clade ; $i >= 0 ; $i-- ) {
61 752 100       1558 if ( $clade[$i] eq ')' ) {
    100          
62 107         301 @tail = @clade[ ( $i + 1 ) .. $#clade ];
63 107         195 last PARSE_TAIL;
64             }
65             elsif ( $i == 0 ) {
66 108         273 @tail = @clade;
67             }
68             }
69            
70             # process branch length, nhx is suffixed
71 215         305 my $bl = $tail[-1];
72 215         239 my $nhx;
73 215 50 33     1333 if ( $bl and $bl =~ /^(.*?)\[&&NHX:(.+?)\]$/ ) {
74 215         673 $node->set_namespaces( 'nhx' => _NS_NHX_ );
75 215         718 ( $bl, $nhx ) = ( $1, $2 );
76 215         603 for my $tuple ( split /:/, $nhx ) {
77 645         1707 my ( $k, $v ) = split /=/, $tuple;
78 645         1734 $node->set_meta_object( 'nhx:' . $k => $v );
79             }
80             }
81              
82             # name only
83 215 50       667 if ( scalar @tail == 1 ) {
    50          
    50          
84 0         0 $node->set_name( $tail[0] );
85             }
86             elsif ( scalar @tail == 2 ) {
87 0         0 $node->set_branch_length( $bl );
88             }
89             elsif ( scalar @tail == 3 ) {
90 215         520 $node->set_name( $tail[0] );
91 215         572 $node->set_branch_length( $bl );
92             }
93             }
94              
95             # podinherit_insert_token
96              
97             =head1 SEE ALSO
98              
99             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
100             for any user or developer questions and discussions.
101              
102             =over
103              
104             =item L<Bio::Phylo::IO>
105              
106             The NHX parser is called by the L<Bio::Phylo::IO> object.
107             Look there to learn how to parse newick strings.
108              
109             =item L<Bio::Phylo::Manual>
110              
111             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>.
112              
113             =back
114              
115             =head1 CITATION
116              
117             If you use Bio::Phylo in published research, please cite it:
118              
119             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
120             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
121             I<BMC Bioinformatics> B<12>:63.
122             L<http://dx.doi.org/10.1186/1471-2105-12-63>
123              
124             =cut
125              
126             1;