File Coverage

Bio/TreeIO/NewickParser.pm
Criterion Covered Total %
statement 115 121 95.0
branch 49 60 81.6
condition 5 6 83.3
subroutine 7 7 100.0
pod 0 2 0.0
total 176 196 89.8


line stmt bran cond sub pod time code
1             # POD documentation - main docs before the code
2              
3             =head1 NAME
4              
5             Module which implements a newick string parser as a finite state
6             machine which enables it to parse the full Newick specification.
7              
8             Taken largely from the Ensembl Compara file with the same name
9             (Bio::EnsEMBL::Compara::Graph::NewickParser), this module adapts the
10             parser to work with BioPerl's event handler-based parsing scheme.
11              
12             This module is used by nhx.pm and newick.pm, and is NOT called
13             directly. Instead, both of those parsing modules extend this module in
14             order to gain access to the main parsing method.
15              
16             =head1 SYNOPSIS
17              
18             # From newick.pm
19             use base qw(Bio::TreeIO Bio::TreeIO::NewickParser);
20              
21             # in the next_tree method...
22             $self->parse_newick($_);
23              
24             =head1 DESCRIPTION
25              
26             This module correctly parses the Newick and NHX formats, sending calls
27             to the BioPerl TreeEventHandler when appropriate in order to build and
28             populate the node objects.
29              
30             =head1 FEEDBACK
31              
32             =head2 Mailing Lists
33              
34             User feedback is an integral part of the evolution of this and other
35             Bioperl modules. Send your comments and suggestions preferably to the
36             Bioperl mailing list. Your participation is much appreciated.
37              
38             bioperl-l@bioperl.org - General discussion
39             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
40              
41             =head2 Support
42              
43             Please direct usage questions or support issues to the mailing list:
44              
45             I
46              
47             rather than to the module maintainer directly. Many experienced and
48             reponsive experts will be able look at the problem and quickly
49             address it. Please include a thorough description of the problem
50             with code and data examples if at all possible.
51              
52             =head2 Reporting Bugs
53              
54             Report bugs to the Bioperl bug tracking system to help us keep track
55             of the bugs and their resolution. Bug reports can be submitted via the
56             web:
57              
58             https://github.com/bioperl/bioperl-live/issues
59              
60             =head1 AUTHOR - Jessica Severin (EnsEMBL implementation), Greg Jordan (BioPerl adaptation)
61              
62             =cut
63              
64             package Bio::TreeIO::NewickParser;
65              
66 16     16   73 use strict;
  16         18  
  16         390  
67 16     16   45 use base qw(Bio::Root::Root);
  16         15  
  16         13698  
68              
69             sub parse_newick {
70 153     153 0 157 my $self = shift;
71 153         163 my $newick = shift;
72              
73 153 100       461 $newick = $newick . ";" unless ($newick =~ m/;/);
74              
75 153         152 my $count=1;
76 153         295 my $debug = $self->verbose;
77 153         284 my $token = next_token(\$newick, "(;");
78 153         161 my $state=1;
79 153         133 my $bracket_level = 0;
80              
81 153         374 $self->_start('tree');
82              
83 153         225 my $leaf_flag = 0;
84              
85 153         264 while(defined($token)) {
86             # backwards-compat. with 5.8.1, no Switch (but we hate if-elsif-ad-infinitum
87 24255 100       41295 if ($state == 1) { #new node
    100          
    100          
    100          
    50          
    0          
88              
89 4851         6322 $self->_start('node');
90              
91 4851         10444 $self->debug(" -> [$token]\n");
92 4851 100       6884 if($token eq '(') { #create new set
93 2308 50       3109 $self->debug(" create set\n") if($debug);
94 2308         2654 $token = next_token(\$newick, "[(:,)");
95 2308         1905 $state = 1;
96 2308         3643 $bracket_level++;
97             } else {
98 2543         1921 $state = 2;
99 2543         3480 $leaf_flag = 1;
100             }
101             } elsif ($state == 2) { #naming a node
102 4851 100       9826 if(!($token =~ /[\[\:\,\)\;]/)) {
103              
104 2632 100 100     4617 if (!$leaf_flag && $self->param('internal_node_id') eq 'bootstrap') {
105 4         8 $self->_start('bootstrap');
106 4         6 $self->_chars($token);
107 4         13 $self->_end('bootstrap');
108 4         4 $token = '';
109             }
110              
111 2632         3188 $self->_start('id');
112 2632         4411 $self->_chars($token);
113 2632         3917 $self->_end('id');
114              
115 2632 50       4396 $self->debug(" naming leaf\n") if ($debug);
116 2632         3237 $token = next_token(\$newick, "[:,);");
117             }
118 4851         6768 $state = 3;
119             } elsif ($state == 3) { # optional : and distance
120 4851 100       5814 if($token eq ':') {
    100          
121 4325         5167 $token = next_token(\$newick, "[,);");
122              
123 4325         6297 $self->_start('branch_length');
124 4325         6768 $self->_chars($token);
125 4325         5974 $self->_end('branch_length');
126              
127 4325         7045 $token = next_token(\$newick, ",);"); #move to , or )
128             } elsif ($token eq '[') { # NHX tag without previous blength
129 15         23 $token .= next_token(\$newick, ",);");
130             }
131 4851         7442 $state = 4;
132             } elsif ($state == 4) { # optional NHX tags
133 4851 100       10212 if($token =~ /\[\&\&NHX/) {
    100          
134             # careful: this regexp gets rid of all NHX wrapping in one step
135              
136 478         593 $self->_start('nhx_tag');
137 478         1596 $token =~ /\[\&\&NHX\:(\S+)\]/;
138 478 50       1173 if ($1) {
139             # NHX may be empty, presumably at end of file, just before ";"
140 478         921 my @attributes = split ':', $1;
141 478         516 foreach my $attribute (@attributes) {
142 778         959 $attribute =~ s/\s+//;
143 778         1504 my($key,$value) = split '=', $attribute;
144              
145 778         1137 $self->_start('tag_name');
146 778         1207 $self->_chars($key);
147 778         1123 $self->_end('tag_name');
148              
149 778         1243 $self->_start('tag_value');
150 778         1157 $self->_chars($value);
151 778         1037 $self->_end('tag_value');
152             }
153             }
154 478         606 $self->_end('nhx_tag');
155              
156 478         755 $token = next_token(\$newick, ",);"); #move to , or )
157             } elsif ($token =~ /\[/) {
158             # This is a hack to make AMPHORA2 work
159 4 50       14 if ($token =~ /\[(\S+)\]/) {
160 4         6 $self->_start('bootstrap');
161 4         7 $self->_chars($1);
162 4         100 $self->_end('bootstrap');
163             }
164 4         9 $token = next_token(\$newick, ",);"); }
165 4851         5927 $state = 5;
166             } elsif ($state == 5) { # end node
167 4851 100       6808 if($token eq ')') {
    100          
    50          
168              
169 2308         3311 $self->_end('node');
170              
171 2308         4216 $token = next_token(\$newick, "[:,);");
172 2308 100 66     7245 if (defined $token && $token eq '[') {
173             # It is possible to have anonymous internal nodes w/ no name
174             # and no blength but with NHX tags
175             #
176             # We use leaf_flag=0 to let the parser know that it's labeling an internal
177             # node. This affects how potential bootstrap values are handled in state 2.
178 8         11 $leaf_flag = 0;
179 8         10 $state = 2;
180             } else {
181 2300         1685 $leaf_flag = 0;
182 2300         1703 $state = 2;
183             }
184 2308         3487 $bracket_level--;
185             } elsif($token eq ',') {
186              
187 2390         3165 $self->_end('node');
188              
189 2390         4163 $token = next_token(\$newick, "[(:,)"); #can be un_blengthed nhx nodes
190 2390         4177 $state=1;
191             } elsif($token eq ';') {
192             #done with tree
193 153 50       590 $self->throw("parse error: unbalanced ()\n") if($bracket_level ne 0);
194              
195 153         289 $self->_end('node');
196 153         341 $self->_end('tree');
197              
198 153         310 $token = next_token(\$newick, "(");
199 153         358 $state=13;
200             } else {
201 0         0 $self->debug("[$token]]\n");
202 0         0 $self->throw("parse error: expected ; or ) or ,\n");
203             }
204             } elsif ($state == 13) {
205 0         0 $self->throw("parse error: nothing expected after ;");
206             }
207             }
208              
209 153 50       277 if ($self->_eventHandler->within_element('tree')) {
210 0         0 $self->_end('node');
211 0         0 $self->_end('tree');
212             }
213             }
214              
215             sub _chars {
216 8521     8521   6356 my $self = shift;
217 8521         6441 my $chars = shift;
218              
219 8521         10366 $self->_eventHandler->characters($chars);
220             }
221              
222             sub _start {
223 14003     14003   10463 my $self = shift;
224 14003         9570 my $name = shift;
225              
226 14003         18836 $self->_eventHandler->start_element({Name=>$name});
227             }
228              
229             sub _end {
230 14003     14003   10459 my $self = shift;
231 14003         9447 my $name = shift;
232              
233 14003         17207 $self->_eventHandler->end_element({Name=>$name});
234             }
235              
236             sub next_token {
237 19091     19091 0 13988 my $string = shift;
238 19091         13093 my $delim = shift;
239            
240 19091         28455 $$string =~ s/^(\s)+//;
241              
242 19091 100       24921 return undef unless(length($$string));
243            
244             #print("input =>$$string\n");
245             #print("delim =>$delim\n");
246 18938         12578 my $index=undef;
247              
248 18938         54098 my @delims = split(/ */, $delim);
249 18938         19281 foreach my $dl (@delims) {
250 80262         73994 my $pos = index($$string, $dl);
251 80262 100       89624 if($pos>=0) {
252 65276 100       70636 $index = $pos unless(defined($index));
253 65276 100       82482 $index = $pos if($pos<$index);
254             }
255             }
256 18938 50       22798 unless(defined($index)) {
257             # have to call as class here (this is not an instance method)
258 0         0 Bio::Root::Root->throw("couldn't find delimiter $delim\n $$string");
259             }
260              
261 18938         13977 my $token ='';
262              
263 18938 100       20353 if($index==0) {
264 11499         11576 $token = substr($$string,0,1);
265 11499         15286 $$string = substr($$string, 1);
266             } else {
267 7439         7651 $token = substr($$string, 0, $index);
268 7439         9191 $$string = substr($$string, $index);
269             }
270              
271             #print(" token =>$token\n");
272             #print(" outstring =>$$string\n\n");
273            
274 18938         28490 return $token;
275             }
276              
277              
278              
279             1;