File Coverage

Bio/TreeIO/NewickParser.pm
Criterion Covered Total %
statement 115 121 95.0
branch 49 60 81.6
condition 5 6 83.3
subroutine 7 7 100.0
pod 0 2 0.0
total 176 196 89.8


line stmt bran cond sub pod time code
1             # POD documentation - main docs before the code
2              
3             =head1 NAME
4              
5             Module which implements a newick string parser as a finite state
6             machine which enables it to parse the full Newick specification.
7              
8             Taken largely from the Ensembl Compara file with the same name
9             (Bio::EnsEMBL::Compara::Graph::NewickParser), this module adapts the
10             parser to work with BioPerl's event handler-based parsing scheme.
11              
12             This module is used by nhx.pm and newick.pm, and is NOT called
13             directly. Instead, both of those parsing modules extend this module in
14             order to gain access to the main parsing method.
15              
16             =head1 SYNOPSIS
17              
18             # From newick.pm
19             use base qw(Bio::TreeIO Bio::TreeIO::NewickParser);
20              
21             # in the next_tree method...
22             $self->parse_newick($_);
23              
24             =head1 DESCRIPTION
25              
26             This module correctly parses the Newick and NHX formats, sending calls
27             to the BioPerl TreeEventHandler when appropriate in order to build and
28             populate the node objects.
29              
30             =head1 FEEDBACK
31              
32             =head2 Mailing Lists
33              
34             User feedback is an integral part of the evolution of this and other
35             Bioperl modules. Send your comments and suggestions preferably to the
36             Bioperl mailing list. Your participation is much appreciated.
37              
38             bioperl-l@bioperl.org - General discussion
39             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
40              
41             =head2 Support
42              
43             Please direct usage questions or support issues to the mailing list:
44              
45             I
46              
47             rather than to the module maintainer directly. Many experienced and
48             reponsive experts will be able look at the problem and quickly
49             address it. Please include a thorough description of the problem
50             with code and data examples if at all possible.
51              
52             =head2 Reporting Bugs
53              
54             Report bugs to the Bioperl bug tracking system to help us keep track
55             of the bugs and their resolution. Bug reports can be submitted via the
56             web:
57              
58             https://github.com/bioperl/bioperl-live/issues
59              
60             =head1 AUTHOR - Jessica Severin (EnsEMBL implementation), Greg Jordan (BioPerl adaptation)
61              
62             =cut
63              
64             package Bio::TreeIO::NewickParser;
65              
66 16     16   72 use strict;
  16         17  
  16         432  
67 16     16   46 use base qw(Bio::Root::Root);
  16         16  
  16         13462  
68              
69             sub parse_newick {
70 153     153 0 165 my $self = shift;
71 153         170 my $newick = shift;
72              
73 153 100       450 $newick = $newick . ";" unless ($newick =~ m/;/);
74              
75 153         134 my $count=1;
76 153         321 my $debug = $self->verbose;
77 153         300 my $token = next_token(\$newick, "(;");
78 153         169 my $state=1;
79 153         136 my $bracket_level = 0;
80              
81 153         366 $self->_start('tree');
82              
83 153         243 my $leaf_flag = 0;
84              
85 153         307 while(defined($token)) {
86             # backwards-compat. with 5.8.1, no Switch (but we hate if-elsif-ad-infinitum
87 24255 100       41660 if ($state == 1) { #new node
    100          
    100          
    100          
    50          
    0          
88              
89 4851         6073 $self->_start('node');
90              
91 4851         10349 $self->debug(" -> [$token]\n");
92 4851 100       6752 if($token eq '(') { #create new set
93 2308 50       2782 $self->debug(" create set\n") if($debug);
94 2308         2647 $token = next_token(\$newick, "[(:,)");
95 2308         1708 $state = 1;
96 2308         3274 $bracket_level++;
97             } else {
98 2543         1934 $state = 2;
99 2543         3509 $leaf_flag = 1;
100             }
101             } elsif ($state == 2) { #naming a node
102 4851 100       10052 if(!($token =~ /[\[\:\,\)\;]/)) {
103              
104 2632 100 100     4422 if (!$leaf_flag && $self->param('internal_node_id') eq 'bootstrap') {
105 4         8 $self->_start('bootstrap');
106 4         7 $self->_chars($token);
107 4         8 $self->_end('bootstrap');
108 4         7 $token = '';
109             }
110              
111 2632         3149 $self->_start('id');
112 2632         4078 $self->_chars($token);
113 2632         3778 $self->_end('id');
114              
115 2632 50       4481 $self->debug(" naming leaf\n") if ($debug);
116 2632         3220 $token = next_token(\$newick, "[:,);");
117             }
118 4851         6780 $state = 3;
119             } elsif ($state == 3) { # optional : and distance
120 4851 100       5936 if($token eq ':') {
    100          
121 4325         4926 $token = next_token(\$newick, "[,);");
122              
123 4325         6325 $self->_start('branch_length');
124 4325         6761 $self->_chars($token);
125 4325         5936 $self->_end('branch_length');
126              
127 4325         7084 $token = next_token(\$newick, ",);"); #move to , or )
128             } elsif ($token eq '[') { # NHX tag without previous blength
129 15         25 $token .= next_token(\$newick, ",);");
130             }
131 4851         7703 $state = 4;
132             } elsif ($state == 4) { # optional NHX tags
133 4851 100       10246 if($token =~ /\[\&\&NHX/) {
    100          
134             # careful: this regexp gets rid of all NHX wrapping in one step
135              
136 478         643 $self->_start('nhx_tag');
137 478         1426 $token =~ /\[\&\&NHX\:(\S+)\]/;
138 478 50       1182 if ($1) {
139             # NHX may be empty, presumably at end of file, just before ";"
140 478         833 my @attributes = split ':', $1;
141 478         490 foreach my $attribute (@attributes) {
142 778         904 $attribute =~ s/\s+//;
143 778         1382 my($key,$value) = split '=', $attribute;
144              
145 778         1158 $self->_start('tag_name');
146 778         1153 $self->_chars($key);
147 778         1017 $self->_end('tag_name');
148              
149 778         1118 $self->_start('tag_value');
150 778         1051 $self->_chars($value);
151 778         1042 $self->_end('tag_value');
152             }
153             }
154 478         585 $self->_end('nhx_tag');
155              
156 478         732 $token = next_token(\$newick, ",);"); #move to , or )
157             } elsif ($token =~ /\[/) {
158             # This is a hack to make AMPHORA2 work
159 4 50       10 if ($token =~ /\[(\S+)\]/) {
160 4         8 $self->_start('bootstrap');
161 4         5 $self->_chars($1);
162 4         104 $self->_end('bootstrap');
163             }
164 4         9 $token = next_token(\$newick, ",);"); }
165 4851         6045 $state = 5;
166             } elsif ($state == 5) { # end node
167 4851 100       6775 if($token eq ')') {
    100          
    50          
168              
169 2308         3053 $self->_end('node');
170              
171 2308         3901 $token = next_token(\$newick, "[:,);");
172 2308 100 66     7051 if (defined $token && $token eq '[') {
173             # It is possible to have anonymous internal nodes w/ no name
174             # and no blength but with NHX tags
175             #
176             # We use leaf_flag=0 to let the parser know that it's labeling an internal
177             # node. This affects how potential bootstrap values are handled in state 2.
178 8         13 $leaf_flag = 0;
179 8         12 $state = 2;
180             } else {
181 2300         1743 $leaf_flag = 0;
182 2300         1648 $state = 2;
183             }
184 2308         3608 $bracket_level--;
185             } elsif($token eq ',') {
186              
187 2390         3064 $self->_end('node');
188              
189 2390         4110 $token = next_token(\$newick, "[(:,)"); #can be un_blengthed nhx nodes
190 2390         4293 $state=1;
191             } elsif($token eq ';') {
192             #done with tree
193 153 50       340 $self->throw("parse error: unbalanced ()\n") if($bracket_level ne 0);
194              
195 153         282 $self->_end('node');
196 153         341 $self->_end('tree');
197              
198 153         306 $token = next_token(\$newick, "(");
199 153         360 $state=13;
200             } else {
201 0         0 $self->debug("[$token]]\n");
202 0         0 $self->throw("parse error: expected ; or ) or ,\n");
203             }
204             } elsif ($state == 13) {
205 0         0 $self->throw("parse error: nothing expected after ;");
206             }
207             }
208              
209 153 50       278 if ($self->_eventHandler->within_element('tree')) {
210 0         0 $self->_end('node');
211 0         0 $self->_end('tree');
212             }
213             }
214              
215             sub _chars {
216 8521     8521   6293 my $self = shift;
217 8521         6229 my $chars = shift;
218              
219 8521         10160 $self->_eventHandler->characters($chars);
220             }
221              
222             sub _start {
223 14003     14003   10043 my $self = shift;
224 14003         9751 my $name = shift;
225              
226 14003         18319 $self->_eventHandler->start_element({Name=>$name});
227             }
228              
229             sub _end {
230 14003     14003   10515 my $self = shift;
231 14003         9987 my $name = shift;
232              
233 14003         16857 $self->_eventHandler->end_element({Name=>$name});
234             }
235              
236             sub next_token {
237 19091     19091 0 13933 my $string = shift;
238 19091         13058 my $delim = shift;
239            
240 19091         28079 $$string =~ s/^(\s)+//;
241              
242 19091 100       24817 return undef unless(length($$string));
243            
244             #print("input =>$$string\n");
245             #print("delim =>$delim\n");
246 18938         13126 my $index=undef;
247              
248 18938         54844 my @delims = split(/ */, $delim);
249 18938         19797 foreach my $dl (@delims) {
250 80262         72259 my $pos = index($$string, $dl);
251 80262 100       88791 if($pos>=0) {
252 65273 100       71293 $index = $pos unless(defined($index));
253 65273 100       81828 $index = $pos if($pos<$index);
254             }
255             }
256 18938 50       21834 unless(defined($index)) {
257             # have to call as class here (this is not an instance method)
258 0         0 Bio::Root::Root->throw("couldn't find delimiter $delim\n $$string");
259             }
260              
261 18938         14917 my $token ='';
262              
263 18938 100       20595 if($index==0) {
264 11499         11963 $token = substr($$string,0,1);
265 11499         14827 $$string = substr($$string, 1);
266             } else {
267 7439         7498 $token = substr($$string, 0, $index);
268 7439         9153 $$string = substr($$string, $index);
269             }
270              
271             #print(" token =>$token\n");
272             #print(" outstring =>$$string\n\n");
273            
274 18938         29568 return $token;
275             }
276              
277              
278              
279             1;