File Coverage

blib/lib/Bio/Phylo/Parsers/Figtree.pm
Criterion Covered Total %
statement 83 86 96.5
branch 16 20 80.0
condition 8 9 88.8
subroutine 10 10 100.0
pod n/a
total 117 125 93.6


line stmt bran cond sub pod time code
1             package Bio::Phylo::Parsers::Figtree;
2 2     2   11 use strict;
  2         5  
  2         49  
3 2     2   7 use base 'Bio::Phylo::Parsers::Abstract';
  2         4  
  2         578  
4 2     2   12 use Bio::Phylo::Util::CONSTANT qw':namespaces :objecttypes';
  2         4  
  2         502  
5 2     2   12 use Bio::Phylo::Factory;
  2         4  
  2         7  
6 2     2   8 use Bio::Phylo::IO 'parse_tree';
  2         4  
  2         79  
7 2     2   11 use Bio::Phylo::Util::Logger ':levels';
  2         3  
  2         1742  
8              
9             my $fac = Bio::Phylo::Factory->new;
10             my $log = Bio::Phylo::Util::Logger->new;
11             my $ns = _NS_FIGTREE_;
12             my $pre = 'fig';
13              
14             =head1 NAME
15              
16             Bio::Phylo::Parsers::Figtree - Parser used by Bio::Phylo::IO, no serviceable parts inside
17              
18             =head1 DESCRIPTION
19              
20             This module parses annotated trees in NEXUS format as interpreted by FigTree
21             (L<http://tree.bio.ed.ac.uk/software/figtree/>), i.e. trees where nodes have
22             additional 'hot comments' attached to them in the tree description. The
23             implementation assumes syntax as follows:
24              
25             [&minmax={0.1231,0.3254},rate=0.0075583392800736]
26            
27             I.e. the first token inside the comments is an ampersand, the annotations are
28             comma-separated key/value pairs, where ranges are between curly parentheses.
29              
30             The annotations are stored as meta objects, e.g.:
31              
32             $node->get_meta_object('fig:rate'); # 0.0075583392800736
33             $node->get_meta_object('fig:minmax_min'); # 0.1231
34             $node->get_meta_object('fig:minmax_max'); # 0.3254
35              
36             Annotations that have non-alphanumerical symbols in them will have these removed
37             from them. For example, C<rate_95%_HPD={}> becomes two annotations:
38             C<rate_95_HPD_min> and C<rate_95_HPD_max>.
39              
40             =cut
41              
42             sub _parse {
43 2     2   5 my $self = shift;
44 2         12 my $fh = $self->_handle;
45 2         26 my $forest = $fac->create_forest;
46 2         34 $forest->set_namespaces( $pre => $ns );
47 2         7 my $tree_block;
48             my $tree_string;
49 2         0 my %translate;
50 2         13 while(<$fh>) {
51 53 100       532 $tree_block++ if /BEGIN TREES;/i;
52 53 100       171 if ( /^\s*TREE (\S+) = \[&([RU])\] (.+)$/i ) {
53 2         22 my ( $name, $rooted, $newick ) = ( $1, $2, $3 );
54 2         5 $tree_string++;
55 2         11 my $tree = parse_tree(
56             '-format' => 'newick',
57             '-string' => $newick,
58             '-ignore_comments' => 1,
59             );
60 2 100       13 $tree->set_as_unrooted if $rooted eq 'U';
61 2         18 $tree->set_name( $name );
62 2         11 $self->_post_process( $tree );
63 2         27 for my $tip ( @{ $tree->get_terminals } ) {
  2         40  
64 122         202 my $name = $tip->get_name;
65 122         246 $tip->set_name( $translate{$name} );
66             }
67 2         17 $forest->insert($tree);
68             }
69 53 100 100     287 if ( $tree_block and not $tree_string and /\s+(\d+)\s+(.+)/ ) {
      100        
70 18         48 my ( $id, $name ) = ( $1, $2 );
71 18         63 $name =~ s/[,;]$//;
72 18         74 $translate{$id} = $name;
73             }
74             }
75 2         15 return $forest;
76             }
77              
78             sub _post_process {
79 2     2   6 my ( $self, $tree ) = @_;
80 2         11 $log->debug("going to post-process tree");
81             $tree->visit(sub{
82 156     156   191 my $n = shift;
83 156         287 my $name = $n->get_name;
84 156         243 $name =~ s/\\//g;
85 156         414 $log->debug("name: $name");
86 156 100 66     590 if ( $name =~ /\[/ and $name =~ /^([^\[]*?)\[(.+?)\]$/ ) {
87 52         182 my ( $trimmed, $comments ) = ( $1, $2 );
88 52         202 $n->set_name( $trimmed );
89 52         172 $log->debug("trimmed name: $trimmed");
90            
91             # "hot comments" start with ampersand. ignore if not.
92 52 50       190 if ( $comments =~ /^&(.+)/ ) {
93 52         162 $log->debug("hot comments: $comments");
94 52         122 $comments = $1;
95            
96             # string needs to be fully eaten up
97 52         107 COMMENT: while( my $old_length = length($comments) ) {
98            
99             # grab the next key
100 463 50       1518 if ( $comments =~ /^(.+?)=/ ) {
101 463         961 my $key = $1;
102            
103             # remove the key and the =
104 463         4149 $comments =~ s/^\Q$key\E=//;
105 463         1068 $key =~ s/\%//;
106            
107             # value is a comma separated range
108 463 100       1697 if ( $comments =~ /^{([^}]+)}/ ) {
    50          
109 206         435 my $value = $1;
110 206         598 my ( $min, $max ) = split /,/, $value;
111 206         601 _meta( $n, "${key}_min" => $min );
112 206         724 _meta( $n, "${key}_max" => $max );
113 206         839 $log->debug("$key: $min .. $max");
114            
115             # remove the range
116 206         385 $value = "{$value}";
117 206         3005 $comments =~ s/^\Q$value\E//;
118             }
119            
120             # value is a scalar
121             elsif ( $comments =~ /^([^,]+)/ ) {
122 257         494 my $value = $1;
123 257         563 _meta( $n, $key => $value );
124 257         3516 $comments =~ s/^\Q$value\E//;
125 257         948 $log->debug("$key: $value");
126             }
127            
128             # remove trailing comma, if any
129 463         1470 $comments =~ s/^,//;
130             }
131 463 50       1520 if ( $old_length == length($comments) ) {
132 0         0 $log->warn("couldn't parse newick comment: $comments");
133 0         0 last COMMENT;
134             }
135             }
136             }
137             else {
138 0         0 $log->debug("not hot: $comments");
139             }
140             }
141 2         27 });
142             }
143              
144             sub _meta {
145 669     669   1252 my ( $node, $key, $value ) = @_;
146             #if ( $key =~ /[()+]/ ) {
147 669         1786 $log->info("cleaning up CURIE candidate $key");
148 669         1072 $key =~ s/\(/_/g;
149 669         862 $key =~ s/\)/_/g;
150 669         808 $key =~ s/\+/_/g;
151 669         809 $key =~ s/\!//;
152             #}
153 669         3243 $node->add_meta(
154             $fac->create_meta( '-triple' => { "${pre}:${key}" => $value } )
155             );
156             }
157              
158              
159             # podinherit_insert_token
160              
161             =head1 SEE ALSO
162              
163             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
164             for any user or developer questions and discussions.
165              
166             =over
167              
168             =item L<Bio::Phylo::IO>
169              
170             The figtree parser is called by the L<Bio::Phylo::IO> object.
171             Look there to learn how to parse phylogenetic data files in general.
172              
173             =item L<Bio::Phylo::Manual>
174              
175             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>
176              
177             =back
178              
179             =head1 CITATION
180              
181             If you use Bio::Phylo in published research, please cite it:
182              
183             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
184             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
185             I<BMC Bioinformatics> B<12>:63.
186             L<http://dx.doi.org/10.1186/1471-2105-12-63>
187              
188             =cut
189              
190             1;