File Coverage

blib/lib/Bio/Phylo/Parsers/Nhx.pm
Criterion Covered Total %
statement 38 40 95.0
branch 8 12 66.6
condition 1 3 33.3
subroutine 8 8 100.0
pod n/a
total 55 63 87.3


line stmt bran cond sub pod time code
1             package Bio::Phylo::Parsers::Nhx;
2 1     1   6 use warnings;
  1         2  
  1         27  
3 1     1   4 use strict;
  1         2  
  1         17  
4 1     1   4 use Bio::Phylo::IO 'parse';
  1         2  
  1         37  
5 1     1   5 use base 'Bio::Phylo::Parsers::Newick';
  1         2  
  1         276  
6 1     1   6 use Bio::Phylo::Util::CONSTANT ':namespaces';
  1         2  
  1         443  
7              
8             =head1 NAME
9              
10             Bio::Phylo::Parsers::Nhx - Parser used by Bio::Phylo::IO, no serviceable parts inside
11              
12             =head1 DESCRIPTION
13              
14             This module parses "New Hampshire eXtended" (NHX) tree descriptions in parenthetical
15             format. The node annotations, which are described here:
16             https://sites.google.com/site/cmzmasek/home/software/forester/nhx, are stored as meta
17             annotations in the namespace whose reserved prefix, nhx, is associated with the above
18             URI. This means that after this parser is done, you can fetch an annotation value thusly:
19              
20             my $gene_name = $node->get_meta_object( 'nhx:GN' );
21              
22             This parser is called by the L<Bio::Phylo::IO> facade, don't call it directly. In turn,
23             this parser delegates processing of Newick strings to L<Bio::Phylo::Parsers::Newick>.
24             As such, several additional flags can be passed to the Bio::Phylo::IO parse and parse_tree
25             functions to influence how to deal with complex newick strings:
26              
27             -keep => [ ...list of taxa names... ]
28              
29             The C<-keep> flag allows you to only retain certain taxa of interest, ignoring others
30             while building the tree object.
31              
32             -keep_whitespace => 1,
33              
34             This will treat unescaped whitespace as if it is a normal taxon name character. Normally,
35             whitespace is only retained inside quoted strings (e.g. C<'Homo sapiens'>), otherwise it
36             is the convention to use underscores (C<Homo_sapiens>). This is because some programs
37             introduce whitespace to prettify a newick string, e.g. to indicate indentation/depth,
38             in which case you almost certainly want to ignore it. This is the default behaviour. The
39             option to keep it is provided for dealing with incorrectly formatted data.
40              
41             Note that the flag C<-ignore_comments>, which is optional for the Newick parser cannot be
42             used. This is because NHX embeds its metadata in what are normally comments (i.e. square
43             brackets), so these must be processed in a special way.
44              
45             =cut
46              
47 1     1   51 sub _return_is_scalar { 1 }
48              
49              
50             sub _parse {
51 1     1   2 my $self = shift;
52 1         7 $self->_args->{'-ignore_comments'} = 1;
53 1         6 return $self->SUPER::_parse;
54             }
55              
56             sub _parse_node_data {
57 215     215   1773 my ( $self, $node, @clade ) = @_;
58 215         461 $self->_logger->debug("parsing name and branch length for node");
59 215         289 my @tail;
60 215         448 PARSE_TAIL: for ( my $i = $#clade ; $i >= 0 ; $i-- ) {
61 752 100       1522 if ( $clade[$i] eq ')' ) {
    100          
62 107         273 @tail = @clade[ ( $i + 1 ) .. $#clade ];
63 107         190 last PARSE_TAIL;
64             }
65             elsif ( $i == 0 ) {
66 108         253 @tail = @clade;
67             }
68             }
69            
70             # process branch length, nhx is suffixed
71 215         281 my $bl = $tail[-1];
72 215         260 my $nhx;
73 215 50 33     1313 if ( $bl and $bl =~ /^(.*?)\[&&NHX:(.+?)\]$/ ) {
74 215         695 $node->set_namespaces( 'nhx' => _NS_NHX_ );
75 215         672 ( $bl, $nhx ) = ( $1, $2 );
76 215         582 for my $tuple ( split /:/, $nhx ) {
77 645         1586 my ( $k, $v ) = split /=/, $tuple;
78 645         1635 $node->set_meta_object( 'nhx:' . $k => $v );
79             }
80             }
81              
82             # name only
83 215 50       646 if ( scalar @tail == 1 ) {
    50          
    50          
84 0         0 $node->set_name( $tail[0] );
85             }
86             elsif ( scalar @tail == 2 ) {
87 0         0 $node->set_branch_length( $bl );
88             }
89             elsif ( scalar @tail == 3 ) {
90 215         544 $node->set_name( $tail[0] );
91 215         602 $node->set_branch_length( $bl );
92             }
93             }
94              
95             # podinherit_insert_token
96              
97             =head1 SEE ALSO
98              
99             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
100             for any user or developer questions and discussions.
101              
102             =over
103              
104             =item L<Bio::Phylo::IO>
105              
106             The NHX parser is called by the L<Bio::Phylo::IO> object.
107             Look there to learn how to parse newick strings.
108              
109             =item L<Bio::Phylo::Manual>
110              
111             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>.
112              
113             =back
114              
115             =head1 CITATION
116              
117             If you use Bio::Phylo in published research, please cite it:
118              
119             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
120             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
121             I<BMC Bioinformatics> B<12>:63.
122             L<http://dx.doi.org/10.1186/1471-2105-12-63>
123              
124             =cut
125              
126             1;