File Coverage

blib/lib/Bio/Phylo/Parsers/Figtree.pm
Criterion Covered Total %
statement 83 86 96.5
branch 16 20 80.0
condition 8 9 88.8
subroutine 10 10 100.0
pod n/a
total 117 125 93.6


line stmt bran cond sub pod time code
1             package Bio::Phylo::Parsers::Figtree;
2 2     2   12 use strict;
  2         3  
  2         54  
3 2     2   9 use base 'Bio::Phylo::Parsers::Abstract';
  2         3  
  2         536  
4 2     2   13 use Bio::Phylo::Util::CONSTANT qw':namespaces :objecttypes';
  2         3  
  2         468  
5 2     2   11 use Bio::Phylo::Factory;
  2         3  
  2         9  
6 2     2   8 use Bio::Phylo::IO 'parse_tree';
  2         5  
  2         78  
7 2     2   10 use Bio::Phylo::Util::Logger ':levels';
  2         3  
  2         2245  
8              
9             my $fac = Bio::Phylo::Factory->new;
10             my $log = Bio::Phylo::Util::Logger->new;
11             my $ns = _NS_FIGTREE_;
12             my $pre = 'fig';
13              
14             =head1 NAME
15              
16             Bio::Phylo::Parsers::Figtree - Parser used by Bio::Phylo::IO, no serviceable parts inside
17              
18             =head1 DESCRIPTION
19              
20             This module parses annotated trees in NEXUS format as interpreted by FigTree
21             (L), i.e. trees where nodes have
22             additional 'hot comments' attached to them in the tree description. The
23             implementation assumes syntax as follows:
24              
25             [&minmax={0.1231,0.3254},rate=0.0075583392800736]
26            
27             I.e. the first token inside the comments is an ampersand, the annotations are
28             comma-separated key/value pairs, where ranges are between curly parentheses.
29              
30             The annotations are stored as meta objects, e.g.:
31              
32             $node->get_meta_object('fig:rate'); # 0.0075583392800736
33             $node->get_meta_object('fig:minmax_min'); # 0.1231
34             $node->get_meta_object('fig:minmax_max'); # 0.3254
35              
36             Annotations that have non-alphanumerical symbols in them will have these removed
37             from them. For example, C becomes two annotations:
38             C and C.
39              
40             =cut
41              
42             sub _parse {
43 2     2   5 my $self = shift;
44 2         10 my $fh = $self->_handle;
45 2         16 my $forest = $fac->create_forest;
46 2         44 $forest->set_namespaces( $pre => $ns );
47 2         8 my $tree_block;
48             my $tree_string;
49 2         0 my %translate;
50 2         14 while(<$fh>) {
51 53 100       545 $tree_block++ if /BEGIN TREES;/i;
52 53 100       183 if ( /^\s*TREE (\S+) = \[&([RU])\] (.+)$/i ) {
53 2         22 my ( $name, $rooted, $newick ) = ( $1, $2, $3 );
54 2         4 $tree_string++;
55 2         11 my $tree = parse_tree(
56             '-format' => 'newick',
57             '-string' => $newick,
58             '-ignore_comments' => 1,
59             );
60 2 100       14 $tree->set_as_unrooted if $rooted eq 'U';
61 2         18 $tree->set_name( $name );
62 2         13 $self->_post_process( $tree );
63 2         32 for my $tip ( @{ $tree->get_terminals } ) {
  2         60  
64 122         224 my $name = $tip->get_name;
65 122         287 $tip->set_name( $translate{$name} );
66             }
67 2         24 $forest->insert($tree);
68             }
69 53 100 100     280 if ( $tree_block and not $tree_string and /\s+(\d+)\s+(.+)/ ) {
      100        
70 18         58 my ( $id, $name ) = ( $1, $2 );
71 18         74 $name =~ s/[,;]$//;
72 18         99 $translate{$id} = $name;
73             }
74             }
75 2         31 return $forest;
76             }
77              
78             sub _post_process {
79 2     2   6 my ( $self, $tree ) = @_;
80 2         10 $log->debug("going to post-process tree");
81             $tree->visit(sub{
82 156     156   236 my $n = shift;
83 156         407 my $name = $n->get_name;
84 156         325 $name =~ s/\\//g;
85 156         509 $log->debug("name: $name");
86 156 100 66     778 if ( $name =~ /\[/ and $name =~ /^([^\[]*?)\[(.+?)\]$/ ) {
87 52         234 my ( $trimmed, $comments ) = ( $1, $2 );
88 52         301 $n->set_name( $trimmed );
89 52         256 $log->debug("trimmed name: $trimmed");
90            
91             # "hot comments" start with ampersand. ignore if not.
92 52 50       255 if ( $comments =~ /^&(.+)/ ) {
93 52         224 $log->debug("hot comments: $comments");
94 52         161 $comments = $1;
95            
96             # string needs to be fully eaten up
97 52         133 COMMENT: while( my $old_length = length($comments) ) {
98            
99             # grab the next key
100 463 50       1742 if ( $comments =~ /^(.+?)=/ ) {
101 463         1381 my $key = $1;
102            
103             # remove the key and the =
104 463         5156 $comments =~ s/^\Q$key\E=//;
105 463         1246 $key =~ s/\%//;
106            
107             # value is a comma separated range
108 463 100       2425 if ( $comments =~ /^{([^}]+)}/ ) {
    50          
109 206         478 my $value = $1;
110 206         705 my ( $min, $max ) = split /,/, $value;
111 206         1158 _meta( $n, "${key}_min" => $min );
112 206         910 _meta( $n, "${key}_max" => $max );
113 206         1170 $log->debug("$key: $min .. $max");
114            
115             # remove the range
116 206         444 $value = "{$value}";
117 206         3604 $comments =~ s/^\Q$value\E//;
118             }
119            
120             # value is a scalar
121             elsif ( $comments =~ /^([^,]+)/ ) {
122 257         657 my $value = $1;
123 257         659 _meta( $n, $key => $value );
124 257         4304 $comments =~ s/^\Q$value\E//;
125 257         1255 $log->debug("$key: $value");
126             }
127            
128             # remove trailing comma, if any
129 463         1853 $comments =~ s/^,//;
130             }
131 463 50       1747 if ( $old_length == length($comments) ) {
132 0         0 $log->warn("couldn't parse newick comment: $comments");
133 0         0 last COMMENT;
134             }
135             }
136             }
137             else {
138 0         0 $log->debug("not hot: $comments");
139             }
140             }
141 2         29 });
142             }
143              
144             sub _meta {
145 669     669   1521 my ( $node, $key, $value ) = @_;
146             #if ( $key =~ /[()+]/ ) {
147 669         2490 $log->info("cleaning up CURIE candidate $key");
148 669         1357 $key =~ s/\(/_/g;
149 669         1034 $key =~ s/\)/_/g;
150 669         1008 $key =~ s/\+/_/g;
151 669         910 $key =~ s/\!//;
152             #}
153 669         4398 $node->add_meta(
154             $fac->create_meta( '-triple' => { "${pre}:${key}" => $value } )
155             );
156             }
157              
158              
159             # podinherit_insert_token
160              
161             =head1 SEE ALSO
162              
163             There is a mailing list at L
164             for any user or developer questions and discussions.
165              
166             =over
167              
168             =item L
169              
170             The figtree parser is called by the L object.
171             Look there to learn how to parse phylogenetic data files in general.
172              
173             =item L
174              
175             Also see the manual: L and L
176              
177             =back
178              
179             =head1 CITATION
180              
181             If you use Bio::Phylo in published research, please cite it:
182              
183             B, B, B, B
184             and B, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
185             I B<12>:63.
186             L
187              
188             =cut
189              
190             1;