File Coverage

blib/lib/Tree/Parser.pm
Criterion Covered Total %
statement 121 124 97.5
branch 57 62 91.9
condition 24 32 75.0
subroutine 21 22 95.4
pod 13 13 100.0
total 236 253 93.2


line stmt bran cond sub pod time code
1              
2             package Tree::Parser;
3              
4 7     7   213940 use strict;
  7         20  
  7         290  
5 7     7   43 use warnings;
  7         11  
  7         432  
6              
7             our $VERSION = '0.15';
8              
9 7     7   46 use Scalar::Util qw(blessed);
  7         19  
  7         992  
10              
11 7     7   27241 use Tree::Simple;
  7         37672  
  7         59  
12 7     7   7520 use Array::Iterator;
  7         8421  
  7         32093  
13              
14             ### constructor
15            
16             sub new {
17 18     18 1 28966 my ($_class, $input) = @_;
18 18   33     112 my $class = ref($_class) || $_class;
19 18         42 my $tree_parser = {};
20 18         45 bless($tree_parser, $class);
21 18         72 $tree_parser->_init($input);
22 18         653 return $tree_parser;
23             }
24              
25             sub _init {
26 18     18   53 my ($self, $input) = @_;
27             # make slots for our 2 filters
28 18         71 $self->{parse_filter} = undef;
29 18         232 $self->{deparse_filter} = undef;
30 18         34 $self->{deparse_filter_cleanup} = undef;
31             # check the input and decide what to
32             # do with it
33 18 100       73 if ($input) {
34             # we accept a Tree::Simple object
35             # and expect then it to be deparsed
36 15 100 100     105 if (blessed($input) && $input->isa("Tree::Simple")) {
37 2         6 $self->{iterator} = undef;
38 2         18 $self->{tree} = $input;
39             }
40             # or we can take a number of types of input
41             # see prepareInput below
42             else {
43 13         53 $self->{iterator} = $self->prepareInput($input);
44 13         664 $self->{tree} = Tree::Simple->new(Tree::Simple->ROOT);
45             }
46             }
47             # if no input is given we create
48             # an empty tree a no iterator
49             else {
50 3         6 $self->{iterator} = undef;
51 3         23 $self->{tree} = Tree::Simple->new(Tree::Simple->ROOT);
52             }
53             }
54              
55             ### methods
56              
57             sub setFileEncoding {
58 0     0 1 0 my ($self, $file_encoding) = @_;
59 0 0       0 (defined($file_encoding)) || die "Insufficient Arguments : file_encoding must be defined";
60 0         0 $self->{file_encoding} = $file_encoding;
61             }
62              
63             sub setInput {
64 5     5 1 2206 my ($self, $input) = @_;
65 5 100       28 (defined($input)) || die "Insufficient Arguments : input undefined";
66 4         16 $self->{iterator} = $self->prepareInput($input);
67             }
68              
69             # prepareInput accepts any of the follow
70             # types of arguments:
71             # - a .tree file
72             # - an array reference of lines
73             # - a single string of code (can have embedded newlines)
74             # and then returns an iterator.
75             # references will be stringified, unless they are array references or
76             # Array::Iterator objects.
77             sub prepareInput {
78 17     17 1 30 my ($self, $input) = @_;
79            
80             # already an A:I instance
81 17 100 66     93 return $input
82             if blessed($input) and $input->isa('Array::Iterator');
83              
84             # a simple array
85 16 100       55 return Array::Iterator->new($input)
86             if ref($input) eq 'ARRAY';
87              
88             # stringifies to something that ends in .tree
89 15 100       57 if ($input =~ /\.tree$/) {
90 3 50       15 IS_A_FILE:
91             my $encoding = (defined $self->{file_encoding}
92             ? (":" . $self->{file_encoding})
93             : '');
94 3 100       168 open(TREE_FILE, ("<" . $encoding), $input) || die "cannot open file: $!";
95 2         73 my @lines = ;
96 2         133 close(TREE_FILE);
97 2         14 return Array::Iterator->new(@lines);
98             }
99             # everything else
100             else {
101 13         33 my @lines;
102 13 100       68 if ($input =~ /\n/) {
    100          
103 8         99 @lines = split /\n/ => $input;
104 8 50       34 (scalar(@lines) > 1)
105             || die "Incorrect Object Type : input looked like a single string, but only a single line ($input) unable to parse input into line (" . (join "==" => @lines) . ")";
106             }
107             elsif ($input =~ /^\(/) {
108 3         66 @lines = grep { $_ ne "" } split /(\(|\)|\s|\")/ => $input; #"
  94         354  
109             }
110             else {
111             # lets check if it is a file though
112 2 100       51 goto IS_A_FILE if -f $input;
113             # otherwise, croak on this sucker ...
114 1         35 die "Incorrect Object Type : input looked like a single string, but has no newlines or does not start with paren";
115             }
116 11         91 return Array::Iterator->new(@lines);
117             }
118             }
119              
120             ## ----------------------------------------------------------------------------
121             ## Filters
122             ## ----------------------------------------------------------------------------
123              
124             ## tab indented filters
125             ## ----------------------------------------------
126             {
127             my $TAB_INDENTED_PARSE = sub ($) {
128             my ($line_iterator) = @_;
129             my $line = $line_iterator->next();
130             my ($tabs, $node) = $line =~ /(\t*)(.*)/;
131             my $depth = length $tabs;
132             return ($depth, $node);
133             };
134            
135             my $TAB_INDENTED_DEPARSE = sub ($) {
136             my ($tree) = @_;
137             return ("\t" x $tree->getDepth()) . $tree->getNodeValue();
138             };
139            
140             sub useTabIndentedFilters {
141 2     2 1 1145 my ($self) = @_;
142 2         6 $self->{parse_filter} = $TAB_INDENTED_PARSE;
143 2         5 $self->{deparse_filter} = $TAB_INDENTED_DEPARSE;
144 2         7 $self->{deparse_filter_cleanup} = undef;
145             }
146             }
147              
148             ## space indented filters
149             ## ----------------------------------------------
150             {
151             my $make_SPACE_INDENTED_PARSE = sub {
152             my ($num_spaces) = @_;
153             return sub ($) {
154             my ($line_iterator) = @_;
155             my $line = $line_iterator->next();
156             my ($spaces, $node) = $line =~ /(\s*)(.*)/;
157             my $depth = (length($spaces) / $num_spaces) ;
158             return ($depth, $node);
159             };
160             };
161            
162             my $make_SPACE_INDENTED_DEPARSE = sub {
163             my ($num_spaces) = @_;
164             my $spaces = (" " x $num_spaces);
165             return sub ($) {
166             my ($tree) = @_;
167             return ($spaces x $tree->getDepth()) . $tree->getNodeValue();
168             };
169             };
170            
171             sub useSpaceIndentedFilters {
172 7     7 1 2640 my ($self, $num_spaces) = @_;
173 7   100     44 $num_spaces ||= 4;
174 7         77 $self->{parse_filter} = $make_SPACE_INDENTED_PARSE->($num_spaces);
175 7         23 $self->{deparse_filter} = $make_SPACE_INDENTED_DEPARSE->($num_spaces);
176 7         20 $self->{deparse_filter_cleanup} = undef;
177             }
178             }
179              
180             ## space indented filters
181             ## ----------------------------------------------
182             {
183              
184             my @default_level_identifiers = (1 .. 100);
185              
186             my $make_DOT_SEPERATED_LEVEL_PARSE = sub {
187             my (@level_identifiers) = @_;
188             @level_identifiers = @default_level_identifiers unless @level_identifiers;
189             return sub {
190             my ($line_iterator) = @_;
191             my $line = $line_iterator->next();
192             my $level_identifiers_reg_ex = join "|" => @level_identifiers;
193             my ($numbers, $value) = $line =~ /([($level_identifiers_reg_ex)\.]*)\s(.*)/;
194             # now split the numbers
195             my @numbers = split /\./ => $numbers;
196             # we know the depth of the tree by home many
197             # numbers are present, and we assume we were
198             # given them in sequential order anyway
199             my $depth = $#numbers;
200             return ($depth, $value);
201             };
202             };
203            
204             my $make_DOT_SEPERATED_LEVEL_DEPARSE = sub {
205             my (@level_identifiers) = @_;
206             @level_identifiers = @default_level_identifiers unless @level_identifiers;
207             return sub {
208             my ($tree) = @_;
209             my @numbers = $level_identifiers[$tree->getIndex()];
210             my $current_tree = $tree->getParent();
211             until ($current_tree->isRoot()) {
212             unshift @numbers => $level_identifiers[$current_tree->getIndex()];
213             $current_tree = $current_tree->getParent();
214             }
215             return ((join "." => @numbers) . " " . $tree->getNodeValue());
216             };
217             };
218            
219             sub useDotSeparatedLevelFilters {
220 2     2 1 1227 my ($self, @level_identifiers) = @_;
221 2         8 $self->{parse_filter} = $make_DOT_SEPERATED_LEVEL_PARSE->(@level_identifiers);
222 2         6 $self->{deparse_filter} = $make_DOT_SEPERATED_LEVEL_DEPARSE->(@level_identifiers);
223 2         6 $self->{deparse_filter_cleanup} = undef;
224             }
225            
226             *useDotSeperatedLevelFilters = \&useDotSeparatedLevelFilters;
227              
228             }
229              
230             ## nested parens filters
231             ## ----------------------------------------------
232             {
233            
234             my $make_NESTED_PARENS_PARSE = sub {
235             my @paren_stack;
236             return sub {
237             my ($line_iterator) = @_;
238             my $line = $line_iterator->next();
239             my $node = "";
240             while (!$node && $node ne 0) {
241             if ($line eq "(") {
242             push @paren_stack => $line;
243             last unless $line_iterator->hasNext();
244             $line = $line_iterator->next();
245             }
246             elsif ($line eq ")") {
247             pop @paren_stack;
248             last unless $line_iterator->hasNext();
249             $line = $line_iterator->next();
250             }
251             elsif ($line eq '"') {
252             $line = ""; # clear the quote
253             while ($line_iterator->hasNext()) {
254             my $next = $line_iterator->next();
255             last if $next eq '"';
256             $line .= $next;
257             }
258             }
259             elsif ($line eq ' ') {
260             # discard misc whitespace
261             $line = $line_iterator->next();
262             next;
263             }
264             else {
265             $node = $line;
266             }
267             }
268             my $depth = $#paren_stack;
269             $depth = 0 if $depth < 0;
270             return ($depth, $node);
271             };
272             };
273              
274             # this is used in clean up as well
275             my $prev_depth;
276             my $NESTED_PARENS_DEPARSE = sub {
277             my ($tree) = @_;
278             my $output = "";
279             unless (defined($prev_depth)) {
280             $output .= "(";
281             $prev_depth = $tree->getDepth();
282             }
283             else {
284             my $current_depth = $tree->getDepth();
285             if ($prev_depth == $current_depth) {
286             $output .= " ";
287             }
288             elsif ($prev_depth < $current_depth) {
289             $output .= " (";
290             }
291             elsif ($prev_depth > $current_depth) {
292             my $delta = $prev_depth - $current_depth;
293             $output .= ")" x $delta . " ";
294             }
295             $prev_depth = $current_depth;
296             }
297             my $current_node = $tree->getNodeValue();
298             $current_node = '"' . $current_node . '"' if $current_node =~ /\s/;
299             $output .= $current_node;
300             return $output;
301             };
302            
303             my $NESTED_PARENS_CLEANUP = sub {
304             my $closing_parens = $prev_depth;
305             # unset this so it can be used again
306             undef $prev_depth;
307             return @_, (")" x ($closing_parens + 1))
308             };
309            
310             sub useNestedParensFilters {
311 4     4 1 2083 my ($self) = @_;
312 4         14 $self->{parse_filter} = $make_NESTED_PARENS_PARSE->();
313 4         10 $self->{deparse_filter} = $NESTED_PARENS_DEPARSE;
314 4         14 $self->{deparse_filter_cleanup} = $NESTED_PARENS_CLEANUP;
315             }
316             }
317              
318             ## manual filters
319             ## ----------------------------------------------
320             # a filter is a subroutine reference
321             # which gets executed upon each line
322             # and it must return two values:
323             # - the depth of the node
324             # - the value of the node (which can
325             # be anything; string, array ref,
326             # object instanace, you name it)
327             # NOTE:
328             # if a filter is not specified, then
329             # the parsers iterator is expected to
330             # return the dual values.
331              
332             sub setParseFilter {
333 9     9 1 4902 my ($self, $filter) = @_;
334 9 100 100     90 (defined($filter) && ref($filter) eq "CODE")
335             || die "Insufficient Arguments : parse filter must be a code reference";
336 6         73 $self->{parse_filter} = $filter;
337             }
338              
339             sub setDeparseFilter {
340 5     5 1 1582 my ($self, $filter) = @_;
341 5 100 100     69 (defined($filter) && ref($filter) eq "CODE")
342             || die "Insufficient Arguments : parse filter must be a code reference";
343 2         6 $self->{deparse_filter} = $filter;
344             }
345              
346             ## ----------------------------------------------------------------------------
347              
348             sub getTree {
349 1     1 1 6 my ($self) = @_;
350 1         3 return $self->{tree};
351             }
352              
353             # deparse creates either:
354             # - an array of lines
355             # - or one large string
356             # which contains the values
357             # created by the sub ref
358             # (unfilter) passed as an argument
359             sub deparse {
360 9     9 1 3587 my ($self) = @_;
361 9 100       91 (defined($self->{deparse_filter}))
362             || die "Parse Error : no deparse filter is specified";
363 8 100       38 (!$self->{tree}->isLeaf())
364             || die "Parse Error : Tree is a leaf node, cannot de-parse a tree that has not be created yet";
365 7         108 return $self->_deparse();
366             }
367              
368             # parser front end
369             sub parse {
370 20     20 1 673 my ($self) = @_;
371 20 100       71 (defined($self->{parse_filter}))
372             || die "Parse Error : No parse filter is specified to parse with";
373 19 100       58 (defined($self->{iterator}))
374             || die "Parse Error : no input has yet been defined, there is nothing to parse";
375 18         54 return $self->_parse();
376             }
377              
378             ## private methods
379              
380             sub _deparse {
381 7     7   14 my ($self) = @_;
382 7         35 my @lines;
383             $self->{tree}->traverse(sub {
384 52     52   1051 my ($tree) = @_;
385 52         116 push @lines => $self->{deparse_filter}->($tree);
386 7         59 });
387 7 100       171 @lines = $self->{deparse_filter_cleanup}->(@lines) if defined $self->{deparse_filter_cleanup};
388             return wantarray ?
389             @lines
390 7 100       55 :
391             join("\n" => @lines);
392             }
393              
394             # private method which parses given
395             # an iterator and a tree
396             sub _parse {
397 18     18   28 my ($self) = @_;
398 18         51 my $tree_type = ref($self->{tree});
399 18         43 my ($i, $current_tree) = ($self->{iterator}, $self->{tree});
400 18         67 while ($i->hasNext()) {
401 134         1503 my ($depth, $node) = $self->{parse_filter}->($i);
402             # if we get nothing back and the iterator
403             # is exhausted, then we now it is time to
404             # stop parsing the input.
405 134 100 66     1042 last if !$depth && !$node && !$i->hasNext();
      66        
406             # depth must be defined ...
407 131 100 100     1095 (defined($depth)
    100          
408             &&
409             # and a digit (int or float)
410             ($depth =~ /^\d+(\.\d*)?$/o)
411             # otherwise we throw and exception
412             ) || die "Parse Error : Incorrect Value for depth (" . ((defined $depth) ? $depth : "undef") . ")";
413             # and node is fine as long as it is defined
414 129 100       270 (defined($node)) || die "Parse Error : node is not defined";
415            
416 128         147 my $new_tree;
417             # if we get back a tree of the same type,
418             # or even of a different type, but still
419             # a Tree::Simple, then we use that ....
420 128 100 33     466 if (blessed($node) && ($node->isa($tree_type) || $node->isa('Tree::Simple'))) {
      66        
421 9         13 $new_tree = $node;
422             }
423             # othewise, we assume it is intended to be
424             # the node of the tree
425             else {
426 119         354 $new_tree = $tree_type->new($node);
427             }
428            
429 128 100       3680 if ($current_tree->isRoot()) {
430 15         163 $current_tree->addChild($new_tree);
431 15         1330 $current_tree = $new_tree;
432 15         57 next;
433             }
434 113         908 my $tree_depth = $current_tree->getDepth();
435 113 100       598 if ($depth == $tree_depth) {
    100          
    50          
436 38         108 $current_tree->addSibling($new_tree);
437 38         3081 $current_tree = $new_tree;
438             }
439             elsif ($depth > $tree_depth) {
440 47 100       129 (($depth - $tree_depth) <= 1)
441             || die "Parse Error : the difference between the depth ($depth) and the tree depth ($tree_depth) is too much (" . ($depth - $tree_depth) . ") at '$node'";
442 46         291 $current_tree->addChild($new_tree);
443 46         10081 $current_tree = $new_tree;
444             }
445             elsif ($depth < $tree_depth) {
446 28         90 $current_tree = $current_tree->getParent() while ($depth < $current_tree->getDepth());
447 28         367 $current_tree->addSibling($new_tree);
448 28         1982 $current_tree = $new_tree;
449             }
450            
451             }
452 14         185 return $self->{tree};
453             }
454              
455             1;
456              
457             __END__