| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Bio::Phylo::Parsers::Figtree; |
|
2
|
2
|
|
|
2
|
|
11
|
use strict; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
49
|
|
|
3
|
2
|
|
|
2
|
|
7
|
use base 'Bio::Phylo::Parsers::Abstract'; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
578
|
|
|
4
|
2
|
|
|
2
|
|
12
|
use Bio::Phylo::Util::CONSTANT qw':namespaces :objecttypes'; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
502
|
|
|
5
|
2
|
|
|
2
|
|
12
|
use Bio::Phylo::Factory; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
7
|
|
|
6
|
2
|
|
|
2
|
|
8
|
use Bio::Phylo::IO 'parse_tree'; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
79
|
|
|
7
|
2
|
|
|
2
|
|
11
|
use Bio::Phylo::Util::Logger ':levels'; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
1742
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $fac = Bio::Phylo::Factory->new; |
|
10
|
|
|
|
|
|
|
my $log = Bio::Phylo::Util::Logger->new; |
|
11
|
|
|
|
|
|
|
my $ns = _NS_FIGTREE_; |
|
12
|
|
|
|
|
|
|
my $pre = 'fig'; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Phylo::Parsers::Figtree - Parser used by Bio::Phylo::IO, no serviceable parts inside |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
This module parses annotated trees in NEXUS format as interpreted by FigTree |
|
21
|
|
|
|
|
|
|
(L<http://tree.bio.ed.ac.uk/software/figtree/>), i.e. trees where nodes have |
|
22
|
|
|
|
|
|
|
additional 'hot comments' attached to them in the tree description. The |
|
23
|
|
|
|
|
|
|
implementation assumes syntax as follows: |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
[&minmax={0.1231,0.3254},rate=0.0075583392800736] |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
I.e. the first token inside the comments is an ampersand, the annotations are |
|
28
|
|
|
|
|
|
|
comma-separated key/value pairs, where ranges are between curly parentheses. |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
The annotations are stored as meta objects, e.g.: |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$node->get_meta_object('fig:rate'); # 0.0075583392800736 |
|
33
|
|
|
|
|
|
|
$node->get_meta_object('fig:minmax_min'); # 0.1231 |
|
34
|
|
|
|
|
|
|
$node->get_meta_object('fig:minmax_max'); # 0.3254 |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Annotations that have non-alphanumerical symbols in them will have these removed |
|
37
|
|
|
|
|
|
|
from them. For example, C<rate_95%_HPD={}> becomes two annotations: |
|
38
|
|
|
|
|
|
|
C<rate_95_HPD_min> and C<rate_95_HPD_max>. |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=cut |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub _parse { |
|
43
|
2
|
|
|
2
|
|
5
|
my $self = shift; |
|
44
|
2
|
|
|
|
|
12
|
my $fh = $self->_handle; |
|
45
|
2
|
|
|
|
|
26
|
my $forest = $fac->create_forest; |
|
46
|
2
|
|
|
|
|
34
|
$forest->set_namespaces( $pre => $ns ); |
|
47
|
2
|
|
|
|
|
7
|
my $tree_block; |
|
48
|
|
|
|
|
|
|
my $tree_string; |
|
49
|
2
|
|
|
|
|
0
|
my %translate; |
|
50
|
2
|
|
|
|
|
13
|
while(<$fh>) { |
|
51
|
53
|
100
|
|
|
|
532
|
$tree_block++ if /BEGIN TREES;/i; |
|
52
|
53
|
100
|
|
|
|
171
|
if ( /^\s*TREE (\S+) = \[&([RU])\] (.+)$/i ) { |
|
53
|
2
|
|
|
|
|
22
|
my ( $name, $rooted, $newick ) = ( $1, $2, $3 ); |
|
54
|
2
|
|
|
|
|
5
|
$tree_string++; |
|
55
|
2
|
|
|
|
|
11
|
my $tree = parse_tree( |
|
56
|
|
|
|
|
|
|
'-format' => 'newick', |
|
57
|
|
|
|
|
|
|
'-string' => $newick, |
|
58
|
|
|
|
|
|
|
'-ignore_comments' => 1, |
|
59
|
|
|
|
|
|
|
); |
|
60
|
2
|
100
|
|
|
|
13
|
$tree->set_as_unrooted if $rooted eq 'U'; |
|
61
|
2
|
|
|
|
|
18
|
$tree->set_name( $name ); |
|
62
|
2
|
|
|
|
|
11
|
$self->_post_process( $tree ); |
|
63
|
2
|
|
|
|
|
27
|
for my $tip ( @{ $tree->get_terminals } ) { |
|
|
2
|
|
|
|
|
40
|
|
|
64
|
122
|
|
|
|
|
202
|
my $name = $tip->get_name; |
|
65
|
122
|
|
|
|
|
246
|
$tip->set_name( $translate{$name} ); |
|
66
|
|
|
|
|
|
|
} |
|
67
|
2
|
|
|
|
|
17
|
$forest->insert($tree); |
|
68
|
|
|
|
|
|
|
} |
|
69
|
53
|
100
|
100
|
|
|
287
|
if ( $tree_block and not $tree_string and /\s+(\d+)\s+(.+)/ ) { |
|
|
|
|
100
|
|
|
|
|
|
70
|
18
|
|
|
|
|
48
|
my ( $id, $name ) = ( $1, $2 ); |
|
71
|
18
|
|
|
|
|
63
|
$name =~ s/[,;]$//; |
|
72
|
18
|
|
|
|
|
74
|
$translate{$id} = $name; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
} |
|
75
|
2
|
|
|
|
|
15
|
return $forest; |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub _post_process { |
|
79
|
2
|
|
|
2
|
|
6
|
my ( $self, $tree ) = @_; |
|
80
|
2
|
|
|
|
|
11
|
$log->debug("going to post-process tree"); |
|
81
|
|
|
|
|
|
|
$tree->visit(sub{ |
|
82
|
156
|
|
|
156
|
|
191
|
my $n = shift; |
|
83
|
156
|
|
|
|
|
287
|
my $name = $n->get_name; |
|
84
|
156
|
|
|
|
|
243
|
$name =~ s/\\//g; |
|
85
|
156
|
|
|
|
|
414
|
$log->debug("name: $name"); |
|
86
|
156
|
100
|
66
|
|
|
590
|
if ( $name =~ /\[/ and $name =~ /^([^\[]*?)\[(.+?)\]$/ ) { |
|
87
|
52
|
|
|
|
|
182
|
my ( $trimmed, $comments ) = ( $1, $2 ); |
|
88
|
52
|
|
|
|
|
202
|
$n->set_name( $trimmed ); |
|
89
|
52
|
|
|
|
|
172
|
$log->debug("trimmed name: $trimmed"); |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# "hot comments" start with ampersand. ignore if not. |
|
92
|
52
|
50
|
|
|
|
190
|
if ( $comments =~ /^&(.+)/ ) { |
|
93
|
52
|
|
|
|
|
162
|
$log->debug("hot comments: $comments"); |
|
94
|
52
|
|
|
|
|
122
|
$comments = $1; |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# string needs to be fully eaten up |
|
97
|
52
|
|
|
|
|
107
|
COMMENT: while( my $old_length = length($comments) ) { |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# grab the next key |
|
100
|
463
|
50
|
|
|
|
1518
|
if ( $comments =~ /^(.+?)=/ ) { |
|
101
|
463
|
|
|
|
|
961
|
my $key = $1; |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# remove the key and the = |
|
104
|
463
|
|
|
|
|
4149
|
$comments =~ s/^\Q$key\E=//; |
|
105
|
463
|
|
|
|
|
1068
|
$key =~ s/\%//; |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# value is a comma separated range |
|
108
|
463
|
100
|
|
|
|
1697
|
if ( $comments =~ /^{([^}]+)}/ ) { |
|
|
|
50
|
|
|
|
|
|
|
109
|
206
|
|
|
|
|
435
|
my $value = $1; |
|
110
|
206
|
|
|
|
|
598
|
my ( $min, $max ) = split /,/, $value; |
|
111
|
206
|
|
|
|
|
601
|
_meta( $n, "${key}_min" => $min ); |
|
112
|
206
|
|
|
|
|
724
|
_meta( $n, "${key}_max" => $max ); |
|
113
|
206
|
|
|
|
|
839
|
$log->debug("$key: $min .. $max"); |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# remove the range |
|
116
|
206
|
|
|
|
|
385
|
$value = "{$value}"; |
|
117
|
206
|
|
|
|
|
3005
|
$comments =~ s/^\Q$value\E//; |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# value is a scalar |
|
121
|
|
|
|
|
|
|
elsif ( $comments =~ /^([^,]+)/ ) { |
|
122
|
257
|
|
|
|
|
494
|
my $value = $1; |
|
123
|
257
|
|
|
|
|
563
|
_meta( $n, $key => $value ); |
|
124
|
257
|
|
|
|
|
3516
|
$comments =~ s/^\Q$value\E//; |
|
125
|
257
|
|
|
|
|
948
|
$log->debug("$key: $value"); |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# remove trailing comma, if any |
|
129
|
463
|
|
|
|
|
1470
|
$comments =~ s/^,//; |
|
130
|
|
|
|
|
|
|
} |
|
131
|
463
|
50
|
|
|
|
1520
|
if ( $old_length == length($comments) ) { |
|
132
|
0
|
|
|
|
|
0
|
$log->warn("couldn't parse newick comment: $comments"); |
|
133
|
0
|
|
|
|
|
0
|
last COMMENT; |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
} |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
else { |
|
138
|
0
|
|
|
|
|
0
|
$log->debug("not hot: $comments"); |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
} |
|
141
|
2
|
|
|
|
|
27
|
}); |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub _meta { |
|
145
|
669
|
|
|
669
|
|
1252
|
my ( $node, $key, $value ) = @_; |
|
146
|
|
|
|
|
|
|
#if ( $key =~ /[()+]/ ) { |
|
147
|
669
|
|
|
|
|
1786
|
$log->info("cleaning up CURIE candidate $key"); |
|
148
|
669
|
|
|
|
|
1072
|
$key =~ s/\(/_/g; |
|
149
|
669
|
|
|
|
|
862
|
$key =~ s/\)/_/g; |
|
150
|
669
|
|
|
|
|
808
|
$key =~ s/\+/_/g; |
|
151
|
669
|
|
|
|
|
809
|
$key =~ s/\!//; |
|
152
|
|
|
|
|
|
|
#} |
|
153
|
669
|
|
|
|
|
3243
|
$node->add_meta( |
|
154
|
|
|
|
|
|
|
$fac->create_meta( '-triple' => { "${pre}:${key}" => $value } ) |
|
155
|
|
|
|
|
|
|
); |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# podinherit_insert_token |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> |
|
164
|
|
|
|
|
|
|
for any user or developer questions and discussions. |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=over |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=item L<Bio::Phylo::IO> |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
The figtree parser is called by the L<Bio::Phylo::IO> object. |
|
171
|
|
|
|
|
|
|
Look there to learn how to parse phylogenetic data files in general. |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=item L<Bio::Phylo::Manual> |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com> |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=back |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 CITATION |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
If you use Bio::Phylo in published research, please cite it: |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen> |
|
184
|
|
|
|
|
|
|
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl. |
|
185
|
|
|
|
|
|
|
I<BMC Bioinformatics> B<12>:63. |
|
186
|
|
|
|
|
|
|
L<http://dx.doi.org/10.1186/1471-2105-12-63> |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=cut |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
1; |