File Coverage

Bio/TreeIO/NewickParser.pm
Criterion Covered Total %
statement 115 121 95.0
branch 49 60 81.6
condition 5 6 83.3
subroutine 7 7 100.0
pod 0 2 0.0
total 176 196 89.8


line stmt bran cond sub pod time code
1             # POD documentation - main docs before the code
2              
3             =head1 NAME
4              
5             Module which implements a newick string parser as a finite state
6             machine which enables it to parse the full Newick specification.
7              
8             Taken largely from the Ensembl Compara file with the same name
9             (Bio::EnsEMBL::Compara::Graph::NewickParser), this module adapts the
10             parser to work with BioPerl's event handler-based parsing scheme.
11              
12             This module is used by nhx.pm and newick.pm, and is NOT called
13             directly. Instead, both of those parsing modules extend this module in
14             order to gain access to the main parsing method.
15              
16             =head1 SYNOPSIS
17              
18             # From newick.pm
19             use base qw(Bio::TreeIO Bio::TreeIO::NewickParser);
20              
21             # in the next_tree method...
22             $self->parse_newick($_);
23              
24             =head1 DESCRIPTION
25              
26             This module correctly parses the Newick and NHX formats, sending calls
27             to the BioPerl TreeEventHandler when appropriate in order to build and
28             populate the node objects.
29              
30             =head1 FEEDBACK
31              
32             =head2 Mailing Lists
33              
34             User feedback is an integral part of the evolution of this and other
35             Bioperl modules. Send your comments and suggestions preferably to the
36             Bioperl mailing list. Your participation is much appreciated.
37              
38             bioperl-l@bioperl.org - General discussion
39             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
40              
41             =head2 Support
42              
43             Please direct usage questions or support issues to the mailing list:
44              
45             I
46              
47             rather than to the module maintainer directly. Many experienced and
48             reponsive experts will be able look at the problem and quickly
49             address it. Please include a thorough description of the problem
50             with code and data examples if at all possible.
51              
52             =head2 Reporting Bugs
53              
54             Report bugs to the Bioperl bug tracking system to help us keep track
55             of the bugs and their resolution. Bug reports can be submitted via the
56             web:
57              
58             https://github.com/bioperl/bioperl-live/issues
59              
60             =head1 AUTHOR - Jessica Severin (EnsEMBL implementation), Greg Jordan (BioPerl adaptation)
61              
62             =cut
63              
64             package Bio::TreeIO::NewickParser;
65              
66 15     15   100 use strict;
  15         52  
  15         403  
67 15     15   59 use base qw(Bio::Root::Root);
  15         26  
  15         13728  
68              
69             sub parse_newick {
70 126     126 0 226 my $self = shift;
71 126         271 my $newick = shift;
72              
73 126 100       477 $newick = $newick . ";" unless ($newick =~ m/;/);
74              
75 126         202 my $count=1;
76 126         355 my $debug = $self->verbose;
77 126         374 my $token = next_token(\$newick, "(;");
78 126         227 my $state=1;
79 126         195 my $bracket_level = 0;
80              
81 126         477 $self->_start('tree');
82              
83 126         297 my $leaf_flag = 0;
84              
85 126         293 while(defined($token)) {
86             # backwards-compat. with 5.8.1, no Switch (but we hate if-elsif-ad-infinitum
87 23135 100       46032 if ($state == 1) { #new node
    100          
    100          
    100          
    50          
    0          
88              
89 4627         9627 $self->_start('node');
90              
91 4627         16093 $self->debug(" -> [$token]\n");
92 4627 100       9194 if($token eq '(') { #create new set
93 2223 50       3910 $self->debug(" create set\n") if($debug);
94 2223         3962 $token = next_token(\$newick, "[(:,)");
95 2223         3034 $state = 1;
96 2223         4050 $bracket_level++;
97             } else {
98 2404         3190 $state = 2;
99 2404         3742 $leaf_flag = 1;
100             }
101             } elsif ($state == 2) { #naming a node
102 4627 100       13625 if(!($token =~ /[\[\:\,\)\;]/)) {
103              
104 2488 100 100     5659 if (!$leaf_flag && $self->param('internal_node_id') eq 'bootstrap') {
105 4         17 $self->_start('bootstrap');
106 4         18 $self->_chars($token);
107 4         16 $self->_end('bootstrap');
108 4         12 $token = '';
109             }
110              
111 2488         5074 $self->_start('id');
112 2488         6627 $self->_chars($token);
113 2488         6280 $self->_end('id');
114              
115 2488 50       5761 $self->debug(" naming leaf\n") if ($debug);
116 2488         5056 $token = next_token(\$newick, "[:,);");
117             }
118 4627         7801 $state = 3;
119             } elsif ($state == 3) { # optional : and distance
120 4627 100       7411 if($token eq ':') {
    100          
121 4131         6577 $token = next_token(\$newick, "[,);");
122              
123 4131         9364 $self->_start('branch_length');
124 4131         10771 $self->_chars($token);
125 4131         9277 $self->_end('branch_length');
126              
127 4131         10013 $token = next_token(\$newick, ",);"); #move to , or )
128             } elsif ($token eq '[') { # NHX tag without previous blength
129 15         42 $token .= next_token(\$newick, ",);");
130             }
131 4627         8424 $state = 4;
132             } elsif ($state == 4) { # optional NHX tags
133 4627 100       12335 if($token =~ /\[\&\&NHX/) {
    100          
134             # careful: this regexp gets rid of all NHX wrapping in one step
135              
136 478         1093 $self->_start('nhx_tag');
137 478         2349 $token =~ /\[\&\&NHX\:(\S+)\]/;
138 478 50       1721 if ($1) {
139             # NHX may be empty, presumably at end of file, just before ";"
140 478         1286 my @attributes = split ':', $1;
141 478         880 foreach my $attribute (@attributes) {
142 778         1464 $attribute =~ s/\s+//;
143 778         2007 my($key,$value) = split '=', $attribute;
144              
145 778         1894 $self->_start('tag_name');
146 778         1860 $self->_chars($key);
147 778         1716 $self->_end('tag_name');
148              
149 778         2005 $self->_start('tag_value');
150 778         2130 $self->_chars($value);
151 778         1531 $self->_end('tag_value');
152             }
153             }
154 478         1156 $self->_end('nhx_tag');
155              
156 478         1221 $token = next_token(\$newick, ",);"); #move to , or )
157             } elsif ($token =~ /\[/) {
158             # This is a hack to make AMPHORA2 work
159 4 50       29 if ($token =~ /\[(\S+)\]/) {
160 4         18 $self->_start('bootstrap');
161 4         19 $self->_chars($1);
162 4         16 $self->_end('bootstrap');
163             }
164 4         20 $token = next_token(\$newick, ",);"); }
165 4627         7593 $state = 5;
166             } elsif ($state == 5) { # end node
167 4627 100       8770 if($token eq ')') {
    100          
    50          
168              
169 2223         5013 $self->_end('node');
170              
171 2223         5769 $token = next_token(\$newick, "[:,);");
172 2223 100 66     7958 if (defined $token && $token eq '[') {
173             # It is possible to have anonymous internal nodes w/ no name
174             # and no blength but with NHX tags
175             #
176             # We use leaf_flag=0 to let the parser know that it's labeling an internal
177             # node. This affects how potential bootstrap values are handled in state 2.
178 8         15 $leaf_flag = 0;
179 8         16 $state = 2;
180             } else {
181 2215         2718 $leaf_flag = 0;
182 2215         2532 $state = 2;
183             }
184 2223         4038 $bracket_level--;
185             } elsif($token eq ',') {
186              
187 2278         4966 $self->_end('node');
188              
189 2278         6565 $token = next_token(\$newick, "[(:,)"); #can be un_blengthed nhx nodes
190 2278         4724 $state=1;
191             } elsif($token eq ';') {
192             #done with tree
193 126 50       336 $self->throw("parse error: unbalanced ()\n") if($bracket_level ne 0);
194              
195 126         324 $self->_end('node');
196 126         404 $self->_end('tree');
197              
198 126         399 $token = next_token(\$newick, "(");
199 126         380 $state=13;
200             } else {
201 0         0 $self->debug("[$token]]\n");
202 0         0 $self->throw("parse error: expected ; or ) or ,\n");
203             }
204             } elsif ($state == 13) {
205 0         0 $self->throw("parse error: nothing expected after ;");
206             }
207             }
208              
209 126 50       350 if ($self->_eventHandler->within_element('tree')) {
210 0         0 $self->_end('node');
211 0         0 $self->_end('tree');
212             }
213             }
214              
215             sub _chars {
216 8183     8183   9370 my $self = shift;
217 8183         9822 my $chars = shift;
218              
219 8183         12740 $self->_eventHandler->characters($chars);
220             }
221              
222             sub _start {
223 13414     13414   14552 my $self = shift;
224 13414         14386 my $name = shift;
225              
226 13414         24096 $self->_eventHandler->start_element({Name=>$name});
227             }
228              
229             sub _end {
230 13414     13414   15297 my $self = shift;
231 13414         14965 my $name = shift;
232              
233 13414         22714 $self->_eventHandler->end_element({Name=>$name});
234             }
235              
236             sub next_token {
237 18223     18223 0 20365 my $string = shift;
238 18223         19456 my $delim = shift;
239            
240 18223         41446 $$string =~ s/^(\s)+//;
241              
242 18223 100       29833 return undef unless(length($$string));
243            
244             #print("input =>$$string\n");
245             #print("delim =>$delim\n");
246 18097         19554 my $index=undef;
247              
248 18097         69022 my @delims = split(/ */, $delim);
249 18097         29636 foreach my $dl (@delims) {
250 76720         89838 my $pos = index($$string, $dl);
251 76720 100       99677 if($pos>=0) {
252 62666 100       80420 $index = $pos unless(defined($index));
253 62666 100       91836 $index = $pos if($pos<$index);
254             }
255             }
256 18097 50       26439 unless(defined($index)) {
257             # have to call as class here (this is not an instance method)
258 0         0 Bio::Root::Root->throw("couldn't find delimiter $delim\n $$string");
259             }
260              
261 18097         22175 my $token ='';
262              
263 18097 100       25526 if($index==0) {
264 10996         17218 $token = substr($$string,0,1);
265 10996         21290 $$string = substr($$string, 1);
266             } else {
267 7101         10916 $token = substr($$string, 0, $index);
268 7101         13030 $$string = substr($$string, $index);
269             }
270              
271             #print(" token =>$token\n");
272             #print(" outstring =>$$string\n\n");
273            
274 18097         39266 return $token;
275             }
276              
277              
278              
279             1;