File Coverage

blib/lib/Data/DFA.pm
Criterion Covered Total %
statement 362 383 94.5
branch 70 96 72.9
condition 10 18 55.5
subroutine 69 71 97.1
pod 29 31 93.5
total 540 599 90.1


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -I/home/phil/perl/cpan/DataNFA/lib/
2             #-------------------------------------------------------------------------------
3             # Deterministic finite state parser from a regular expression.
4             # Philip R Brenan at gmail dot com, Appa Apps Ltd Inc., 2018-2020
5             #-------------------------------------------------------------------------------
6             # podDocumentation
7             package Data::DFA;
8             our $VERSION = 20200705;
9             require v5.26;
10 1     1   867 use warnings FATAL => qw(all);
  1         8  
  1         37  
11 1     1   5 use strict;
  1         2  
  1         33  
12 1     1   6 use Carp qw(confess);
  1         2  
  1         83  
13 1     1   536 use Data::Dump qw(dump);
  1         7803  
  1         59  
14 1     1   729 use Data::NFA;
  1         153378  
  1         129  
15 1     1   43 use Data::Table::Text qw(:all);
  1         3  
  1         1418  
16 1     1   9 use feature qw(current_sub say);
  1         9  
  1         5685  
17              
18             # dfa: {state=>state name, transitions=>{symbol=>state}, final state=>{reduction rule=>1}, pumps=>[[pumping lemmas]]}
19              
20             my $logFile = q(/home/phil/z/z/z/zzz.txt); # Log printed results if developing
21              
22             #D1 Construct regular expression # Construct a regular expression that defines the language to be parsed using the following combining operations:
23              
24             sub element($) #S One element.
25 54     54 1 292 {my ($label) = @_; # Transition symbol
26 54         127 &Data::NFA::element(@_);
27             }
28              
29             sub sequence(@) #S Sequence of elements.
30 20     20 1 133 {my (@elements) = @_; # Elements
31 20         64 &Data::NFA::sequence(@_);
32             }
33              
34             sub optional(@) #S An optional sequence of element.
35 6     6 1 67 {my (@element) = @_; # Elements
36 6         34 &Data::NFA::optional(@_);
37             }
38              
39             sub zeroOrMore(@) #S Zero or more repetitions of a sequence of elements.
40 30     30 1 205 {my (@element) = @_; # Elements
41 30         116 &Data::NFA::zeroOrMore(@_);
42             }
43              
44             sub oneOrMore(@) #S One or more repetitions of a sequence of elements.
45 33     33 1 279 {my (@element) = @_; # Elements
46 33         133 &Data::NFA::oneOrMore(@_);
47             }
48              
49             sub choice(@) #S Choice from amongst one or more elements.
50 22     22 1 180 {my (@elements) = @_; # Elements to be chosen from
51 22         85 &Data::NFA::choice(@_);
52             }
53              
54             sub except(@) #S Choice from amongst all symbols except the ones mentioned
55 1     1 1 12 {my (@elements) = @_; # Elements to be chosen from
56 1         5 &Data::NFA::except(@_);
57             }
58              
59             #1 Deterministic finite state parser # Create a deterministic finite state automaton to parse sequences of symbols in the language defined by a regular expression.
60              
61             sub newDFA #P Create a new DFA.
62 136     136 1 441 {genHash(q(Data::DFA)); # DFA State
63             }
64              
65             sub newState(%) #P Create a new DFA state with the specified options.
66 495     495 1 1194 {my (%options) = @_; # DFA state as hash
67              
68 495         1197 my $r = genHash(q(Data::DFA::State), # DFA State
69             state => undef, # Name of the state - the join of the NFA keys
70             nfaStates => undef, # Hash whose keys are the NFA states that contributed to this super state
71             transitions => undef, # Transitions from this state
72             final => undef, # Whether this state is final
73             pump => undef, # Pumping lemmas for this state
74             sequence => undef, # Sequence of states to final state minus pumped states
75             );
76              
77 495         37436 %$r = (%$r, %options);
78              
79 495         2860 $r
80             }
81              
82             sub fromNfa($) #P Create a DFA parser from an NFA.
83 38     38 1 183338 {my ($nfa) = @_; # Nfa
84              
85 38         134 my $dfa = newDFA; # A DFA is a hash of states
86              
87 38         560 my @nfaStates = (0, $nfa->statesReachableViaJumps(0)->@*); # Nfa states reachable from the start state
88 38         11644 my $initialSuperState = join ' ', sort @nfaStates; # Initial super state
89              
90             $$dfa{$initialSuperState} = newState( # Start state
91             state => $initialSuperState, # Name of the state - the join of the NFA keys
92 222         439 nfaStates => {map{$_=>1} @nfaStates}, # Hash whose keys are the NFA states that contributed to this super state
93 38         131 final => finalState($nfa, {map {$_=>1} @nfaStates}), # Whether this state is final
  222         421  
94             );
95              
96 38         256 $dfa->superStates($initialSuperState, $nfa); # Create DFA superstates from states reachable from the start state
97              
98 38         171 my $r = $dfa->renumberDfa($initialSuperState); # Renumber
99 38         171 my $d = $r->removeDuplicatedStates; # Remove duplicate states
100 38         198 my $u = $d->removeUnreachableStates; # Remove unreachable states
101 38         110 my $R = $u->renumberDfa(0); # Renumber again
102              
103 38         1255 $R # Resulting Dfa
104             }
105              
106             sub fromExpr(@) #S Create a DFA parser from a regular B<@expression>.
107 37     37 1 311 {my (@expression) = @_; # Regular expression
108 37         151 fromNfa(Data::NFA::fromExpr(@expression))
109             }
110              
111             sub finalState($$) #P Check whether, in the specified B<$nfa>, any of the states named in the hash reference B<$reach> are final. Final states that refer to reduce rules are checked for reduce conflicts.
112 152     152 1 331 {my ($nfa, $reach) = @_; # NFA, hash of states in the NFA
113 152         217 my @final; # Reduction rule
114              
115 152         517 for my $state(sort keys %$reach) # Each state we can reach
116 947 100       16626 {if (my $f = $nfa->isFinal($state))
117 219         5594 {push @final, $f;
118             }
119             }
120              
121 152 100       2823 @final ? \@final : undef; # undef if not final else reduction rules
122             }
123              
124             sub superState($$$$$) #P Create super states from existing superstate.
125 152     152 1 359 {my ($dfa, $superStateName, $nfa, $symbols, $nfaSymbolTransitions) = @_; # DFA, start state in DFA, NFA we are converting, symbols in the NFA we are converting, states reachable from each state by symbol
126 152         267 my $superState = $$dfa{$superStateName};
127 152         2637 my $nfaStates = $superState->nfaStates;
128              
129 152         575 my @created; # New super states created
130 152         321 for my $symbol(@$symbols) # Each symbol
131 496         1440 {my $reach = {}; # States in NFS reachable from start state in dfa
132 496         1672 for my $nfaState(sort keys %$nfaStates) # Each NFA state in the dfa start state
133 3113 50       5476 {if (my $r = $$nfaSymbolTransitions{$nfaState}{$symbol}) # States in the NFA reachable on the symbol
134 3113         4336 {map{$$reach{$_}++} @$r; # Accumulate NFA reachable NFA states
  7349         11193  
135             }
136             }
137              
138 496 100       1342 if (keys %$reach) # Current symbol takes us somewhere
139 229         1140 {my $newSuperStateName = join ' ', sort keys %$reach; # Name of the super state reached from the start state via the current symbol
140 229 100       643 if (!$$dfa{$newSuperStateName}) # Super state does not exists so create it
141 114         301 {my $newState = $$dfa{$newSuperStateName} = newState
142             (nfaStates => $reach,
143             transitions => undef,
144             final => finalState($nfa, $reach),
145             );
146 114         263 push @created, $newSuperStateName; # Find all its transitions
147             }
148 229         4371 $$dfa{$superStateName}->transitions->{$symbol} = $newSuperStateName;
149             }
150             }
151              
152             @created
153 152         1332 }
154              
155             sub superStates($$$) #P Create super states from existing superstate.
156 38     38 1 108 {my ($dfa, $SuperStateName, $nfa) = @_; # DFA, start state in DFA, NFA we are tracking
157 38         172 my $symbols = [$nfa->symbols]; # Symbols in nfa
158 38         12489 my $transitions = $nfa->allTransitions; # Precompute transitions in the NFA
159              
160 38         327487 my @fix = ($SuperStateName);
161 38         148 while(@fix) # Create each superstate as the set of all nfa states we could be in after each transition on a symbol
162 152         380 {push @fix, superState($dfa, pop @fix, $nfa, $symbols, $transitions);
163             }
164             }
165              
166             sub transitionOnSymbol($$$) #P The super state reached by transition on a symbol from a specified state.
167 75     75 1 146 {my ($dfa, $superStateName, $symbol) = @_; # DFA, start state in DFA, symbol
168 75         141 my $superState = $$dfa{$superStateName};
169 75         1208 my $transitions = $superState->transitions;
170              
171 75         304 $$transitions{$symbol}
172             } # transitionOnSymbol
173              
174             sub renumberDfa($$) #P Renumber the states in the specified B<$dfa>.
175 98     98 1 252 {my ($dfa, $initialStateName) = @_; # DFA, initial super state name
176 98         151 my %rename;
177 98         229 my $cfa = newDFA;
178              
179 98         1118 $rename{$initialStateName} = 0; # The start state is always 0 in the dfa
180 98         356 for my $s(sort keys %$dfa) # Each state
181 343 100       777 {$rename{$s} = keys %rename if !exists $rename{$s}; # Rename state
182             }
183              
184 98         299 for my $superStateName(sort keys %$dfa) # Each state
185 343         1582 {my $sourceState = $rename{$superStateName};
186 343         905 my $s = $$cfa{$sourceState} = newState;
187 343         569 my $superState = $$dfa{$superStateName};
188              
189 343         6541 my $transitions = $superState->transitions;
190 343         1933 for my $symbol(sort keys %$transitions) # Rename the target of every transition
191 484         8585 {$s->transitions->{$symbol} = $rename{$$transitions{$symbol}};
192             }
193              
194 343         6521 $s->final = $superState->final;
195             }
196              
197             $cfa
198 98         687 } # renumberDfa
199              
200             sub univalent($) # Check that the L is univalent: a univalent L has a mapping from symbols to states. Returns a hash showing the mapping from symbols to states if the L is univalent, else returns B.
201 5     5 0 18 {my ($dfa) = @_; # Dfa to check
202              
203 5         9 my %symbols; # Symbol to states
204 5         36 for my $state(sort keys %$dfa) # Each state name
205 19         319 {my $transitions = $$dfa{$state}->transitions; # Transitions from state being checked
206 19         101 for my $symbol(sort keys %$transitions) # Each transition from the state being checked
207 25         39 {my $target = $$transitions{$symbol}; # New state reached by transition from state being checked
208 25         64 $symbols{$symbol}{$target}++; # Count targets for symbol
209             }
210             }
211              
212 5         14 my @multiple; my %single; # Symbols with multiple targets, single targets
213 5         19 for my $symbol(sort keys %symbols) # Symbols
214 12         44 {my @states = sort keys $symbols{$symbol}->%*; # States
215 12 100       33 if (@states == 1) # Univalent
216 10         28 {($single{$symbol}) = @states; # Mapping
217             }
218             else # Multivalent
219 2         6 {push @multiple, $symbol
220             }
221             }
222              
223 5 50 33     81 dumpFile($logFile, \%single) if -e $logFile and !@multiple; # Log the result if requested
224              
225 5 100       62 @multiple ? undef : \%single # Only return the mapping if it is valid
226             } # univalent
227              
228             #D1 Print # Pritn the Dfa in various ways.
229              
230             sub printFinal($) #P Print a final state
231 77     77 1 137 {my ($final) = @_; # final State
232 77         103 my %f;
233 77         160 for my $f(@$final)
234 126 50       279 {$f{ref($f) ? $f->print->($f) : $f}++;
235             }
236 77         376 join ' ', sort keys %f;
237             }
238              
239             sub print($;$) # Print the specified B<$dfa> using the specified B<$title>.
240 15     15 1 66 {my ($dfa, $title) = @_; # DFA, optional title
241              
242 15         36 my @out;
243 15         91 for my $superStateName(sort {$a <=> $b} keys %$dfa) # Each state
  72         133  
244 52         100 {my $superState = $$dfa{$superStateName};
245 52         901 my $transitions = $superState->transitions;
246 52         934 my $Final = $superState->final;
247 52 100       390 if (my @s = sort keys %$transitions) # Transitions present
248 50         87 {my $s = $s[0];
249 50         124 my $S = $dfa->transitionOnSymbol($superStateName, $s);
250 50         805 my $final = $$dfa{$S}->final;
251 50 100       275 push @out, [$superStateName, $Final ? 1 : q(), $s, $$transitions{$s},
252             printFinal($final)];
253 50         197 for(1..$#s)
254 25         52 {my $s = $s[$_];
255 25         53 my $S = $dfa->transitionOnSymbol($superStateName, $s);
256 25         406 my $final = $$dfa{$S}->final;
257 25         115 push @out, ['', '', $s, $$transitions{$s}, printFinal($final)];
258             }
259             }
260             else # No transitions present
261 2 50       9 {push @out, [$superStateName, $Final ? 1 : q(), q(), q(), printFinal($Final)];
262             }
263             }
264              
265             my $r = sub # Format results as a table
266 15 50   15   54 {if (@out)
267 15         118 {my $t = formatTable([@out], [qw(State Final Symbol Target Final)])."\n";
268 15 100       12374 my $s = $title ? "$title\n$t" : $t;
269 15         463 $s =~ s(\s*\Z) ()gs;
270 15         414 $s =~ s(\s*\n) (\n)gs;
271 15         74 return "$s\n";
272             }
273 0         0 "$title: No states in Dfa";
274 15         153 }->();
275              
276 15 50       541 owf($logFile, $r) if -e $logFile; # Log the result if requested
277 15         199 $r # Return the result
278             }
279              
280             sub symbols($) # Return an array of all the symbols accepted by a B<$dfa>.
281 2     2 1 7 {my ($dfa) = @_; # DFA
282 2         4 my %symbols;
283 2         8 for my $superState(values %$dfa) # Each state
284 10         192 {my $transitions = $superState->transitions;
285 10         71 $symbols{$_}++ for sort keys %$transitions; # Symbol for each transition
286             }
287              
288 2         33 sort keys %symbols;
289             }
290              
291             sub parser($;$) # Create a parser from a B<$dfa> constructed from a regular expression.
292 104     104 1 277 {my ($dfa, $observer) = @_; # Deterministic finite state automaton, optional observer
293 104         392 return genHash(q(Data::DFA::Parser), # Parse a sequence of symbols with a DFA
294             dfa => $dfa, # DFA being used
295             state => 0, # Current state
296             fail => undef, # Symbol on which we failed
297             observer => $observer, # Optional sub($parser, $symbol, $target) to observe transitions.
298             processed => [], # Symbols processed
299             );
300             }
301              
302             sub dumpAsJson($) # Create a JSON string representing a B<$dfa>.
303 0     0 1 0 {my ($dfa) = @_; # Deterministic finite state automaton generated from an expression
304 0         0 my $jfa;
305 0         0 for my $state(sort keys %$dfa) # Each state
306 0         0 {my $transitions = $$dfa{$state}->transitions; # Transitions
307 0         0 for my $t(sort keys %$transitions)
308 0         0 {$$jfa{transitions}{$state}{$t} = $$transitions{$t}; # Clone transitions
309             }
310 0         0 $$jfa{finalStates}{$state} = $$dfa{$state}->final; # Final states
311             }
312 0         0 encodeJson($jfa);
313             }
314              
315             sub removeDuplicatedStates($) #P Remove duplicated states in a B<$dfa>.
316 38     38 1 98 {my ($dfa) = @_; # Deterministic finite state automaton generated from an expression
317 38         57 my $deleted; # Deleted state count
318              
319 38         130 for(1..100) # Keep squeezing out duplicates
320 60         106 {my %d;
321 60         210 for my $state(sort keys %$dfa) # Each state
322 221         470 {my $s = $$dfa{$state}; # State
323             # my $c = dump([$s->transitions, $s->final]); # State content
324 221 100       4505 my $c = dump([$s->transitions, $s->final ? 1 : 0]); # State content - we only need to know if the state is final or not
325 221         73578 push $d{$c}->@*, $state;
326             }
327              
328 60         134 my %m; # Map deleted duplicated states back to undeleted original
329 60         149 for my $d(values %d) # Delete unitary states
330 191         354 {my ($b, @d) = $d->@*;
331 191 100       408 if (@d)
332 23         51 {for my $r(@d) # Map duplicated states to base unduplicated state
333 30         68 {$m{$r} = $b; # Map
334 30         113 delete $$dfa{$r}; # Remove duplicated state from DFA
335 30         66 ++$deleted;
336             }
337             }
338             }
339              
340 60 100       146 if (keys %m) # Remove duplicate states
341 22         85 {for my $state(values %$dfa) # Each state
342 69         1225 {my $transitions = $state->transitions;
343 69         391 for my $symbol(sort keys %$transitions)
344 95         168 {my $s = $$transitions{$symbol};
345 95 100       230 if (defined $m{$s})
346 33         89 {$$transitions{$symbol} = $m{$s};
347             }
348             }
349             }
350             }
351 38         144 else {last};
352             }
353              
354 38 100       120 $deleted ? renumberDfa($dfa, 0) : $dfa; # Renumber states if necessary
355             } # removeDuplicatedStates
356              
357             sub removeUnreachableStates($) #P Remove unreachable states in a B<$dfa>.
358 38     38 1 93 {my ($dfa) = @_; # Deterministic finite state automaton generated from an expression
359 38         84 my $deleted = 0; # Count of deleted unreachable states
360 38         88 my %reachable; # States reachable from the start state
361             my %checked; # States that have been checked
362 38         0 my @check; # States to check
363              
364 38         172 my ($startState) = sort keys %$dfa; # Start state name
365 38         112 $reachable{$startState}++; # Mark start state as reachable
366 38         70 $checked{$startState}++; # Mark start state as checked
367 38         89 push @check, $startState; # Check start state
368              
369 38         101 while(@check) # Check each state reachable from the start state
370 122         266 {my $state = pop @check; # State to check
371 122         2155 for my $s(sort keys $$dfa{$state}->transitions->%*) # Target each transition from the state
372 160         3018 {my $t = $$dfa{$state}->transitions->{$s}; # Target state
373 160         688 $reachable{$t}++; # Mark target as reachable
374 160 100       576 push @check, $t unless $checked{$t}++; # Check states reachable from the target state unless already checked
375             }
376             }
377              
378 38         194 for my $s(sort keys %$dfa) # Each state
379 122 50       250 {if (!$reachable{$s}) # Unreachable state
380 0         0 {++$deleted; # Count unreachable states
381 0         0 delete $$dfa{$s}; # Remove unreachable state
382             }
383             }
384 38 50       130 my $r = $deleted ? renumberDfa($dfa, 0) : $dfa; # Renumber states if necessary
385 38         117 $r
386             }
387              
388             # Trace from the start node through to each node remembering the long path. If
389             # we reach a node we have already visited then add a pumping lemma to it as the
390             # long path minus the short path used to reach the node the first time.
391             #
392             # Trace from start node to final states without revisiting nodes. Write the
393             # expressions so obtained as a choice of sequences with pumping lemmas.
394              
395             sub printAsExpr2($%) #P Print a DFA B<$dfa_> in an expression form determined by the specified B<%options>.
396             {my ($dfa, %options) = @_; # Dfa, options.
397              
398             checkKeys(\%options, # Check options
399             {element => q(Format a single element),
400             choice => q(Format a choice of expressions),
401             sequence => q(Format a sequence of expressions),
402             zeroOrMore => q(Format zero or more instances of an expression),
403             oneOrMore => q(One or more instances of an expression),
404             });
405              
406             my ($fe, $fc, $fs, $fz, $fo) = @options{qw(element choice sequence zeroOrMore oneOrMore)};
407              
408             my %pumped; # States pumped => [symbols along pump]
409             my $pump; $pump = sub # Create pumping lemmas for each state
410 51     51   115 {my ($state, @path) = @_; # Current state, path to state
411 51 100       120 if (defined $pumped{$state}) # State already visited
412 17         66 {my @pump = @path[$pumped{$state}..$#path]; # Long path minus short path
413 17 50       319 push $$dfa{$state}->pump->@*, [@pump] if @pump; # Add the pumping lemma
414             }
415             else # State not visited
416 34         604 {my $transitions = $$dfa{$state}->transitions; # Transitions hash
417 34         150 $pumped{$state} = @path; # Record visit to this state
418 34         127 for my $t(sort keys %$transitions) # Visit each adjacent states
419 37         168 {&$pump($$transitions{$t}, @path, $t); # Visit adjacent state
420             }
421             }
422             };
423              
424             &$pump(0); # Find the pumping lemmas for each node
425              
426             my %visited; # States visited => [symbols along path to state]
427             my $visit; $visit = sub # Find non pumped paths
428 51     51   117 {my ($state, @path) = @_; # Current state, sequence to get here
429 51 100 100     715 if (@path and $$dfa{$state}->final) # Non empty path leads to final state
430 20         411 {push $$dfa{$state}->sequence->@*, [@path] # Record path as a sequence that leads to a final state
431             }
432 51 100       283 if (!defined $visited{$state}) # States not yet visited
433 34         586 {my $transitions = $$dfa{$state}->transitions; # Transitions hash
434 34         154 $visited{$state} = [@path];
435 34         103 for my $symbol(sort keys %$transitions) # Visit each adjacent states
436 37         63 {my $s = $$transitions{$symbol}; # Adjacent state
437 37         145 &$visit($$transitions{$symbol}, @path, [$state, $symbol, $s]); # Visit adjacent state
438             }
439 34         88 delete $visited{$state};
440             }
441             };
442              
443             &$visit(0); # Find unpumped paths
444              
445             my @choices; # Construct regular expression as choices amongst pumped paths
446             for my $s(sort keys %$dfa) # Each state name
447             {my $state = $$dfa{$s}; # Each state
448              
449             if ($state->final) # Final state
450             {for my $path($state->sequence->@*) # Each path to this state
451             {my @seq;
452              
453             for my $step(@$path) # Current state, sequence to get here
454             {my ($from, $symbol, $to) = @$step; # States not yet visited
455             push @seq, &$fe($symbol) unless $from == $to; # Add element unless looping in a final state
456              
457             if (my $pump = $$dfa{$to}->pump) # Add pumping lemmas
458             {my @c;
459             for my $p(@$pump) # Add each pumping lemma for this state
460             {if (@$p == 1) # Format one element
461             {push @c, &$fe($$p[0]);
462             }
463             else # Sequence of several elements
464             {push @c, &$fs(map {&$fe($_)} @$p);
465             }
466             }
467              
468             my sub insertPump($) # Insert a pumping path
469             {my ($c) = @_; # Pumping path, choice
470             my $z = &$fz($c);
471             if (@seq and $seq[-1] eq $c) # Sequence plus zero or more of same sequence is one or more of sequence
472             {$seq[-1] = &$fo($c)
473             }
474             elsif (!@seq or $seq[-1] ne $z && $seq[-1] ne &$fo($c)) # Suppress multiple zero or more or one and more followed by zero or more of the same item
475             {push @seq, $z
476             }
477             }
478              
479             if (@c == 1) {insertPump $c[0]} # Only one pumping lemma
480             else {insertPump &$fc(@c)} # Multiple pumping lemmas
481             }
482             }
483             push @choices, &$fs(@seq); # Combine choice of sequences from start state
484             }
485             }
486             };
487              
488             my sub debracket($) # Remove duplicated outer brackets
489             {my ($re) = @_; # Re
490             while(length($re) > 1 and substr($re, 0, 1) eq '(' and
491             substr($re, -1, 1) eq ')')
492             {$re = substr($re, 1, -1)
493             }
494             $re
495             }
496              
497             return debracket $choices[0] if @choices == 1; # No wrapping needed if only one choice
498             debracket &$fc(map {&$fs($_)} @choices)
499             } # printAsExpr2
500              
501             sub printAsExpr($) # Print a B<$dfa> as an expression.
502 4     4 1 10 {my ($dfa) = @_; # DFA
503              
504             my %options = # Formatting methods
505             (element => sub
506 14     14   22 {my ($e) = @_;
507 14         41 qq/element(q($e))/
508             },
509             choice => sub
510 2     2   10 {my $c = join ', ', @_;
511 2         7 qq/choice($c)/
512             },
513             sequence => sub
514 4     4   15 {my $s = join ', ', @_;
515 4         20 qq/sequence($s)/
516             },
517             zeroOrMore => sub
518 4     4   7 {my ($z) = @_;
519 4         13 qq/zeroOrMore($z)/
520             },
521             oneOrMore => sub
522 4     4   10 {my ($o) = @_;
523 4         17 qq/oneOrMore($o)/
524             },
525 4         62 );
526              
527 4         32 my $r = printAsExpr2($dfa, %options); # Create an expression for the DFA
528 4         8 if (1) # Remove any unnecessary outer sequence
529 4         10 {my $s = q/sequence(/;
530 4 50 33     26 if (substr($r, 0, length($s)) eq $s and substr($r, -1, 1) eq q/)/)
531 4         11 {$r = substr($r, length($s), -1)
532             }
533             }
534             $r
535 4         88 }
536              
537             sub printAsRe($) # Print a B<$dfa> as a regular expression.
538 10     10 1 25 {my ($dfa) = @_; # DFA
539              
540             my %options = # Formatting methods
541 56     56   95 (element => sub {my ($e) = @_; $e},
  56         129  
542             choice => sub
543 12     12   21 {my %c = map {$_=>1} @_;
  28         72  
544 12         50 my @c = sort keys %c;
545 12 100       96 return $c[0] if @c == 1;
546 2         6 my $c = join ' | ', @c;
547 2         9 qq/($c)/
548             },
549             sequence => sub
550 34 100   34   110 {return $_[0] if @_ == 1;
551 8         21 my $s = join ' ', @_;
552 8         27 qq/($s)/
553             },
554 24     24   40 zeroOrMore => sub {my ($z) = @_; qq/$z*/},
  24         65  
555 22     22   40 oneOrMore => sub {my ($z) = @_; qq/$z+/},
  22         126  
556 10         146 );
557              
558 10         72 printAsExpr2($dfa, %options); # Create an expression for the DFA
559             }
560              
561             sub parseDtdElementAST($) # Convert the Dtd Element definition in B<$string> to a parse tree.
562 3     3 1 9 {my ($string) = @_; # String representation of DTD element expression
563             package dtdElementDfa;
564 1     1   10 use Carp;
  1         2  
  1         124  
565 1     1   7 use Data::Dump qw(dump);
  1         2  
  1         76  
566 1     1   7 use Data::Table::Text qw(:all);
  1         2  
  1         1675  
567              
568             sub element($) # An element
569 11     11   23 {my ($e) = @_;
570 11         86 bless ['element', $e]
571             }
572              
573             sub multiply # Zero or more, one or more, optional
574 3     3   11 {my ($l, $r) = @_;
575             my $o = sub
576 3 50   3   15 {return q(zeroOrMore) if $r eq q(*);
577 0 0       0 return q(oneOrMore) if $r eq q(+);
578 0 0       0 return q(optional) if $r eq q(?);
579 0         0 confess "Unexpected multiplier $r";
580 3         18 }->();
581 3         23 bless [$o, $l];
582             }
583              
584             sub choice # Choice
585 2     2   8 {my ($l, $r) = @_;
586 2         10 bless ["choice", $l, $r];
587             }
588              
589             sub sequence # Sequence
590 6     6   12 {my ($l, $r) = @_;
591 6         42 bless ["sequence", $l, $r];
592             }
593              
594             use overload
595 1         24 '**' => \&multiply,
596             '*' => \&choice,
597 1     1   9 '+' => \&sequence;
  1         3  
598              
599             sub parse($) # Parse a string
600 3     3   8 {my ($S) = @_; # String
601 3         7 my $s = $S;
602 3         8 $s =~ s(#PCDATA) (PCDATA)gs;
603 3         48 $s =~ s(((\w|-)+)) (element(q($1)))gs; # Word
604 3         30 $s =~ s(\)\s*([*+?])) (\) ** q($1))gs;
605 3         13 $s =~ s(\|) (*)gs;
606 3         17 $s =~ s(,\s+) (+)gs;
607 3         349 my $r = eval $s;
608 3 50       19 say STDERR "$@\n$S\n$s\n" if $@;
609 3         15 $r
610             }
611              
612 3         11 parse($string) # Parse the DTD element expression into a tree
613             }
614              
615             sub parseDtdElement($) # Convert the L <>DTD> Element definition in the specified B<$string> to a DFA.
616 2     2 1 6 {my ($string) = @_; # DTD element expression string
617 2         7 fromExpr parseDtdElementAST($string) # Create a DFA from a parse tree created from the Dtd element expression string
618             }
619              
620             #D1 Paths # Find paths in a DFA.
621              
622             sub subArray($$) #P Whether the second array is contained within the first.
623 36     36 1 66 {my ($A, $B) = @_; # Exterior array, interior array
624 36 100       142 return 1 unless @$B; # The empty set is contained by every set
625 27         51 my @a = @$A;
626 27         49 my ($b, @b) = @$B; # Next element to match in the second array
627 27         50 while(@a) # Each start position in the first array
628 48         66 {my $a = shift @a;
629 48 100 100     135 return 1 if $a eq $b and __SUB__->([@a], [@b]) # Current position matches and remainder of second array is contained in the remainder of the first array
630             }
631             0
632 9         34 }
633              
634             sub removeLongerPathsThatContainShorterPaths($) #P Remove longer paths that contain shorter paths.
635 0     0 1 0 {my ($paths) = @_; # Paths
636 0         0 my @paths = sort keys %$paths; # Paths in definite order
637 0         0 for my $p(@paths) # Each long path
638 0 0       0 {next unless exists $$paths{$p}; # Long path still present
639 0         0 for my $q(@paths) # Each short path
640 0 0       0 {next unless exists $$paths{$q}; # Short path still present
641 0 0 0     0 delete $$paths{$p} if $p ne $q and subArray($$paths{$p}, $$paths{$q}); # Remove long path if it contains short path
642             }
643             }
644             }
645              
646             sub shortPaths($) # Find a set of paths that reach every state in the DFA with each path terminating in a final state.
647             {my ($dfa) = @_; # DFA
648             my %paths; # {path => [transitions]} the transitions in each path
649             my @path; # Transitions in the current path
650             my %seen; # {state} states already seen in this path
651              
652             my sub check($) # Check remaining states from the specified state
653             {my ($state) = @_; # Check from this state
654             $paths{join ' ', @path} = [@path] if $$dfa{$state}->final; # Save non repeating path at a final state
655             my $transitions = $$dfa{$state}->transitions; # Transitions from state being checked
656             for my $symbol(sort keys %$transitions) # Each transition from the state being checked not already seen
657             {my $new = $$transitions{$symbol}; # New state reached by transition from state being checked
658             push @path, $symbol; # Add transition to path
659             if (!$seen{$new}) # New state has not been visited yet
660             {$seen{$new}++; # Mark state as already been seen
661             __SUB__->($new); # Check from new state
662             delete $seen{$new} # Mark state as not already been seen
663             }
664             pop @path; # Remove current transition from path
665             }
666             }
667              
668             $seen{0}++; # Mark start state as seen
669             check(0); # Start at the start state
670              
671             dumpFile($logFile, \%paths) if -e $logFile; # Log the result if requested
672              
673             \%paths # Hash of non repeating paths
674             } # shortPaths
675              
676             sub longPaths($) # Find a set of paths that traverse each transition in the DFA with each path terminating in a final state.
677             {my ($dfa) = @_; # DFA
678             my %paths; # {path => [transitions]} the transitions in each path
679             my @path; # Transitions in the current path
680             my %seen; # {state} states already seen in this path so we can avoid loops
681              
682             my sub check($) # Check remaining states from the specified state
683             {my ($state) = @_; # Check from this state
684             $paths{join ' ', @path} = [@path] if $$dfa{$state}->final; # Save non repeating path at a final state
685             my $transitions = $$dfa{$state}->transitions; # Transitions from state being checked
686             for my $symbol(sort keys %$transitions) # Each transition from the state being checked not already seen
687             {my $new = $$transitions{$symbol}; # New state reached by transition from state being checked
688             if (!$seen{$state}{$symbol}++) # Mark state as already been seen
689             {push @path, $symbol; # Add transition to path
690             __SUB__->($new); # Check from new state
691             pop @path; # Remove current transition from path
692             delete $seen{$state}{$symbol} # Mark state as not already been seen
693             }
694             }
695             }
696              
697             check(0); # Start at the start state
698              
699             dumpFile($logFile, \%paths) if -e $logFile; # Log the result if requested
700              
701             \%paths # Hash of non repeating paths
702             } # longPaths
703              
704             sub loops($) # Find the non repeating loops from each state.
705             {my ($dfa) = @_; # DFA
706             my %loops; #{state=>[[non repeating loop through states]]}
707              
708             my sub loopsFromState($) # Find loops starting at this state
709             {my ($start) = @_; # Check from this state
710              
711             my @path; # Transitions in the current path
712             my %seen; # {state} states already seen in this path
713              
714             my sub check($) # Check remaining states from the specified state
715             {my ($state) = @_; # Check from this state
716             my $transitions = $$dfa{$state}->transitions; # Transitions from state being checked
717             for my $symbol(sort keys %$transitions) # Each transition from the state being checked not already seen
718             {my $new = $$transitions{$symbol}; # New state reached by transition from state being checked
719             push @path, $symbol; # Add transition to path
720             push $loops{$start}->@*, [@path] if $new == $start; # Save loop
721             if (!$seen{$new}) # New state has not been visited yet
722             {$seen{$new}++; # Mark state as already been seen
723             __SUB__->($new); # Check from new state
724             delete $seen{$new} # Mark state as not already been seen
725             }
726             pop @path; # Remove current transition from path
727             }
728             }
729              
730             $seen{$start}++; # Mark start state as seen
731             check($start); # Start at the start state
732             }
733              
734             loopsFromState($_) for sort keys %$dfa; # Loops from each state
735             dumpFile($logFile, \%loops) if -e $logFile; # Log the result if requested
736              
737             \%loops
738             } # loops
739              
740             #D1 Parser methods # Use the DFA to parse a sequence of symbols
741              
742             sub Data::DFA::Parser::accept($$) # Using the specified B<$parser>, accept the next symbol drawn from the symbol set if possible by moving to a new state otherwise confessing with a helpful message that such a move is not possible.
743 230     230   397 {my ($parser, $symbol) = @_; # DFA Parser, next symbol to be processed by the finite state automaton
744 230         4005 my $dfa = $parser->dfa; # Dfa
745 230         4282 my $observer = $parser->observer; # Optional observer
746 230         4144 my $transitions = $$dfa{$parser->state}->transitions; # Transitions for current state
747 230         4723 my $nextState = $$transitions{$symbol}; # Target state transitioned to
748 230 100       480 if (defined $nextState) # Valid target state
749 197 100       486 {$observer->($parser, $symbol, $nextState) if $observer; # Log transition if required
750 197         3140 $parser->state = $nextState; # Enter next state
751 197         701 push @{$parser->processed}, $symbol; # Save transition symbol
  197         2994  
752 197         1058 return 1; # Success
753             }
754             else # No such transition
755 33         181 {$parser->{next} = [my @next = sort keys %$transitions]; # Valid symbols
756 33         72 my @processed = @{$parser->processed}; # Symbols processed successfully
  33         562  
757 33         700 $parser->fail = $symbol; # Failing symbol
758              
759 33         206 push my @m, "Already processed: ". join(' ', @processed); # Create error message
760              
761 33 100       85 if (scalar(@next) > 0) # Expected
762 31         75 {push @m, "Expected one of : ". join(' ', @next);
763             }
764             else
765 2         5 {push @m, "Expected nothing more.";
766             }
767              
768 33         82 push @m, "But found : ". $symbol, ""; # Found
769              
770 33         242 die join "\n", @m;
771             }
772             }
773              
774             sub Data::DFA::Parser::final($) # Returns whether the specified B<$parser> is in a final state or not.
775 72     72   133 {my ($parser) = @_; # DFA Parser
776 72         1213 my $dfa = $parser->dfa;
777 72         1352 my $state = $parser->state;
778 72         1350 $$dfa{$state}->final
779             }
780              
781             sub Data::DFA::Parser::next($) # Returns an array of symbols that would be accepted in the current state by the specified B<$parser>.
782 1     1   3 {my ($parser) = @_; # DFA Parser
783 1         17 my $dfa = $parser->dfa;
784 1         21 my $state = $parser->state;
785 1         21 my $transitions = $$dfa{$state}->transitions;
786 1         13 sort keys %$transitions
787             }
788              
789             sub Data::DFA::Parser::accepts($@) # Confirm that the specified B<$parser> accepts an array representing a sequence of symbols.
790 103     103   7579 {my ($parser, @symbols) = @_; # DFA Parser, array of symbols
791 103         216 for my $symbol(@symbols) # Parse the symbols
792 227         395 {eval {$parser->accept($symbol)}; # Try to accept a symbol
  227         489  
793 227 50 66     676 confess "Error in observer: $@" if $@ and $@ !~ m(Already processed);
794 227 100       571 return 0 if $@; # Failed
795             }
796             $parser->final # Confirm we are in an end state
797 71         163 }
798              
799             #D1 Data Structures # Data structures used by this package.
800              
801             #D0
802             #-------------------------------------------------------------------------------
803             # Export
804             #-------------------------------------------------------------------------------
805              
806 1     1   2314 use Exporter qw(import);
  1         3  
  1         49  
807              
808 1     1   6 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  1         2  
  1         489  
809              
810             @ISA = qw(Exporter);
811             @EXPORT_OK = qw(
812             choice
813             fromExpr
814             fromNfa
815             element
816             except
817             oneOrMore optional
818             parser
819             print
820             sequence
821             zeroOrMore
822             );
823             %EXPORT_TAGS = (all=>[@EXPORT, @EXPORT_OK]);
824              
825             # podDocumentation
826              
827             =pod
828              
829             =encoding utf-8
830              
831             =head1 Name
832              
833             Data::DFA - Deterministic finite state parser from regular expression.
834              
835             =head1 Synopsis
836              
837             Create a deterministic finite state parser to recognize sequences of symbols
838             that match a given regular expression.
839              
840             To recognize sequences of symbols drawn from B<'a'..'e'> that match
841             the regular expression: B:
842              
843             Create the parser:
844              
845             my $dfa = fromExpr
846             ("a",
847             oneOrMore
848             (choice(qw(b c))),
849             optional("d"),
850             "e"
851             );
852              
853             Recognize sequences of symbols:
854              
855             ok $dfa->parser->accepts(qw(a b e));
856             ok $dfa->parser->accepts(qw(a b c e));
857             ok !$dfa->parser->accepts(qw(a d));
858             ok !$dfa->parser->accepts(qw(a c d));
859             ok $dfa->parser->accepts(qw(a c d e));
860              
861             Print the transition table:
862              
863             is_deeply $dfa->print("a(b|c)+d?e"), <
864             a(b|c)+d?e
865             State Final Symbol Target Final
866             1 0 a 4
867             2 1 b 1
868             3 c 1
869             4 d 2
870             5 e 3 1
871             6 2 e 3 1
872             7 3 1 1
873             8 4 b 1
874             9 c 1
875             END
876              
877             Discover why a sequence cannot be recognized:
878              
879             my $parser = $dfa->parser;
880              
881             eval { $parser->accept($_) } for qw(a b a);
882              
883             is_deeply $@, <
884             Already processed: a b
885             Expected one of : b c d e
886             But found : a
887             END
888              
889             is_deeply $parser->fail, qq(a);
890             is_deeply [$parser->next], [qw(b c d e)];
891             is_deeply $parser->processed, [qw(a b)];
892              
893             ok !$parser->final;
894              
895             To construct and parse regular expressions in the format used by B
896             definitions in Ls used to validate L:
897              
898             is_deeply
899             parseDtdElement(q(a, b*, c))->printAsExpr,
900             q/element(q(a)), zeroOrMore(element(q(b))), element(q(c))/;
901              
902             =head1 Description
903              
904             Deterministic finite state parser from regular expression.
905              
906              
907             Version 20200627.
908              
909              
910             The following sections describe the methods in each functional area of this
911             module. For an alphabetic listing of all methods by name see L.
912              
913              
914              
915             =head1 Construct regular expression
916              
917             Construct a regular expression that defines the language to be parsed using the following combining operations:
918              
919             =head2 element($label)
920              
921             One element.
922              
923             Parameter Description
924             1 $label Transition symbol
925              
926             B
927              
928              
929             my $dfa = fromExpr # Construct DFA
930             ("a",
931             oneOrMore(choice(qw(b c))),
932             optional("d"),
933             "e"
934             );
935             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
936              
937             my $dfa = fromExpr # Construct DFA
938             (𝗲𝗹𝗲𝗺𝗲𝗻𝘁("a"),
939             oneOrMore(choice(𝗲𝗹𝗲𝗺𝗲𝗻𝘁("b"), 𝗲𝗹𝗲𝗺𝗲𝗻𝘁("c"))),
940             optional(𝗲𝗹𝗲𝗺𝗲𝗻𝘁("d")),
941             𝗲𝗹𝗲𝗺𝗲𝗻𝘁("e")
942             );
943             my $parser = $dfa->parser; # New parser
944              
945             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
946              
947             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
948             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
949              
950             ok !$parser->final; # Not in a final state
951              
952             ok $dfa->dumpAsJson eq <
953             {
954             "finalStates" : {
955             "0" : null,
956             "1" : null,
957             "2" : null,
958             "4" : null,
959             "5" : 1
960             },
961             "transitions" : {
962             "0" : {
963             "a" : "2"
964             },
965             "1" : {
966             "b" : "1",
967             "c" : "1",
968             "d" : "4",
969             "e" : "5"
970             },
971             "2" : {
972             "b" : "1",
973             "c" : "1"
974             },
975             "4" : {
976             "e" : "5"
977             }
978             }
979             }
980             END
981              
982              
983             This is a static method and so should either be imported or invoked as:
984              
985             Data::DFA::element
986              
987              
988             =head2 sequence(@elements)
989              
990             Sequence of elements.
991              
992             Parameter Description
993             1 @elements Elements
994              
995             B
996              
997              
998             my $dfa = fromExpr # Construct DFA
999             (zeroOrMore(𝘀𝗲𝗾𝘂𝗲𝗻𝗰𝗲('a'..'c')),
1000             except('b'..'d')
1001             );
1002              
1003             ok $dfa->parser->accepts(qw(a b c a ));
1004             ok !$dfa->parser->accepts(qw(a b c a b));
1005             ok !$dfa->parser->accepts(qw(a b c a c));
1006             ok !$dfa->parser->accepts(qw(a c c a b c));
1007              
1008              
1009             ok $dfa->print(q(Test)) eq <
1010             Test
1011             State Final Symbol Target Final
1012             1 0 a 1 1
1013             2 1 1 b 2
1014             3 2 c 0
1015             END
1016              
1017              
1018             This is a static method and so should either be imported or invoked as:
1019              
1020             Data::DFA::sequence
1021              
1022              
1023             =head2 optional(@element)
1024              
1025             An optional sequence of element.
1026              
1027             Parameter Description
1028             1 @element Elements
1029              
1030             B
1031              
1032              
1033             my $dfa = fromExpr # Construct DFA
1034             ("a",
1035             oneOrMore(choice(qw(b c))),
1036             𝗼𝗽𝘁𝗶𝗼𝗻𝗮𝗹("d"),
1037             "e"
1038             );
1039             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1040              
1041             my $dfa = fromExpr # Construct DFA
1042             (element("a"),
1043             oneOrMore(choice(element("b"), element("c"))),
1044             𝗼𝗽𝘁𝗶𝗼𝗻𝗮𝗹(element("d")),
1045             element("e")
1046             );
1047             my $parser = $dfa->parser; # New parser
1048              
1049             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1050              
1051             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1052             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1053              
1054             ok !$parser->final; # Not in a final state
1055              
1056             ok $dfa->dumpAsJson eq <
1057             {
1058             "finalStates" : {
1059             "0" : null,
1060             "1" : null,
1061             "2" : null,
1062             "4" : null,
1063             "5" : 1
1064             },
1065             "transitions" : {
1066             "0" : {
1067             "a" : "2"
1068             },
1069             "1" : {
1070             "b" : "1",
1071             "c" : "1",
1072             "d" : "4",
1073             "e" : "5"
1074             },
1075             "2" : {
1076             "b" : "1",
1077             "c" : "1"
1078             },
1079             "4" : {
1080             "e" : "5"
1081             }
1082             }
1083             }
1084             END
1085              
1086              
1087             This is a static method and so should either be imported or invoked as:
1088              
1089             Data::DFA::optional
1090              
1091              
1092             =head2 zeroOrMore(@element)
1093              
1094             Zero or more repetitions of a sequence of elements.
1095              
1096             Parameter Description
1097             1 @element Elements
1098              
1099             B
1100              
1101              
1102             my $dfa = fromExpr # Construct DFA
1103             (𝘇𝗲𝗿𝗼𝗢𝗿𝗠𝗼𝗿𝗲(sequence('a'..'c')),
1104             except('b'..'d')
1105             );
1106              
1107             ok $dfa->parser->accepts(qw(a b c a ));
1108             ok !$dfa->parser->accepts(qw(a b c a b));
1109             ok !$dfa->parser->accepts(qw(a b c a c));
1110             ok !$dfa->parser->accepts(qw(a c c a b c));
1111              
1112              
1113             ok $dfa->print(q(Test)) eq <
1114             Test
1115             State Final Symbol Target Final
1116             1 0 a 1 1
1117             2 1 1 b 2
1118             3 2 c 0
1119             END
1120              
1121              
1122             This is a static method and so should either be imported or invoked as:
1123              
1124             Data::DFA::zeroOrMore
1125              
1126              
1127             =head2 oneOrMore(@element)
1128              
1129             One or more repetitions of a sequence of elements.
1130              
1131             Parameter Description
1132             1 @element Elements
1133              
1134             B
1135              
1136              
1137             my $dfa = fromExpr # Construct DFA
1138             ("a",
1139             𝗼𝗻𝗲𝗢𝗿𝗠𝗼𝗿𝗲(choice(qw(b c))),
1140             optional("d"),
1141             "e"
1142             );
1143             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1144              
1145             my $dfa = fromExpr # Construct DFA
1146             (element("a"),
1147             𝗼𝗻𝗲𝗢𝗿𝗠𝗼𝗿𝗲(choice(element("b"), element("c"))),
1148             optional(element("d")),
1149             element("e")
1150             );
1151             my $parser = $dfa->parser; # New parser
1152              
1153             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1154              
1155             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1156             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1157              
1158             ok !$parser->final; # Not in a final state
1159              
1160             ok $dfa->dumpAsJson eq <
1161             {
1162             "finalStates" : {
1163             "0" : null,
1164             "1" : null,
1165             "2" : null,
1166             "4" : null,
1167             "5" : 1
1168             },
1169             "transitions" : {
1170             "0" : {
1171             "a" : "2"
1172             },
1173             "1" : {
1174             "b" : "1",
1175             "c" : "1",
1176             "d" : "4",
1177             "e" : "5"
1178             },
1179             "2" : {
1180             "b" : "1",
1181             "c" : "1"
1182             },
1183             "4" : {
1184             "e" : "5"
1185             }
1186             }
1187             }
1188             END
1189              
1190              
1191             This is a static method and so should either be imported or invoked as:
1192              
1193             Data::DFA::oneOrMore
1194              
1195              
1196             =head2 choice(@elements)
1197              
1198             Choice from amongst one or more elements.
1199              
1200             Parameter Description
1201             1 @elements Elements to be chosen from
1202              
1203             B
1204              
1205              
1206             my $dfa = fromExpr # Construct DFA
1207             ("a",
1208             oneOrMore(𝗰𝗵𝗼𝗶𝗰𝗲(qw(b c))),
1209             optional("d"),
1210             "e"
1211             );
1212             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1213              
1214             my $dfa = fromExpr # Construct DFA
1215             (element("a"),
1216             oneOrMore(𝗰𝗵𝗼𝗶𝗰𝗲(element("b"), element("c"))),
1217             optional(element("d")),
1218             element("e")
1219             );
1220             my $parser = $dfa->parser; # New parser
1221              
1222             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1223              
1224             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1225             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1226              
1227             ok !$parser->final; # Not in a final state
1228              
1229             ok $dfa->dumpAsJson eq <
1230             {
1231             "finalStates" : {
1232             "0" : null,
1233             "1" : null,
1234             "2" : null,
1235             "4" : null,
1236             "5" : 1
1237             },
1238             "transitions" : {
1239             "0" : {
1240             "a" : "2"
1241             },
1242             "1" : {
1243             "b" : "1",
1244             "c" : "1",
1245             "d" : "4",
1246             "e" : "5"
1247             },
1248             "2" : {
1249             "b" : "1",
1250             "c" : "1"
1251             },
1252             "4" : {
1253             "e" : "5"
1254             }
1255             }
1256             }
1257             END
1258              
1259              
1260             This is a static method and so should either be imported or invoked as:
1261              
1262             Data::DFA::choice
1263              
1264              
1265             =head2 except(@elements)
1266              
1267             Choice from amongst all symbols except the ones mentioned
1268              
1269             Parameter Description
1270             1 @elements Elements to be chosen from
1271              
1272             B
1273              
1274              
1275             my $dfa = fromExpr # Construct DFA
1276             (zeroOrMore(sequence('a'..'c')),
1277             𝗲𝘅𝗰𝗲𝗽𝘁('b'..'d')
1278             );
1279              
1280             ok $dfa->parser->accepts(qw(a b c a ));
1281             ok !$dfa->parser->accepts(qw(a b c a b));
1282             ok !$dfa->parser->accepts(qw(a b c a c));
1283             ok !$dfa->parser->accepts(qw(a c c a b c));
1284              
1285              
1286             ok $dfa->print(q(Test)) eq <
1287             Test
1288             State Final Symbol Target Final
1289             1 0 a 1 1
1290             2 1 1 b 2
1291             3 2 c 0
1292             END
1293              
1294              
1295             This is a static method and so should either be imported or invoked as:
1296              
1297             Data::DFA::except
1298              
1299              
1300             =head2 fromExpr(@expression)
1301              
1302             Create a DFA parser from a regular B<@expression>.
1303              
1304             Parameter Description
1305             1 @expression Regular expression
1306              
1307             B
1308              
1309              
1310             my $dfa = 𝗳𝗿𝗼𝗺𝗘𝘅𝗽𝗿 # Construct DFA
1311             ("a",
1312             oneOrMore(choice(qw(b c))),
1313             optional("d"),
1314             "e"
1315             );
1316             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1317              
1318             my $dfa = 𝗳𝗿𝗼𝗺𝗘𝘅𝗽𝗿 # Construct DFA
1319             (element("a"),
1320             oneOrMore(choice(element("b"), element("c"))),
1321             optional(element("d")),
1322             element("e")
1323             );
1324             my $parser = $dfa->parser; # New parser
1325              
1326             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1327              
1328             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1329             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1330              
1331             ok !$parser->final; # Not in a final state
1332              
1333             ok $dfa->dumpAsJson eq <
1334             {
1335             "finalStates" : {
1336             "0" : null,
1337             "1" : null,
1338             "2" : null,
1339             "4" : null,
1340             "5" : 1
1341             },
1342             "transitions" : {
1343             "0" : {
1344             "a" : "2"
1345             },
1346             "1" : {
1347             "b" : "1",
1348             "c" : "1",
1349             "d" : "4",
1350             "e" : "5"
1351             },
1352             "2" : {
1353             "b" : "1",
1354             "c" : "1"
1355             },
1356             "4" : {
1357             "e" : "5"
1358             }
1359             }
1360             }
1361             END
1362              
1363              
1364             This is a static method and so should either be imported or invoked as:
1365              
1366             Data::DFA::fromExpr
1367              
1368              
1369             =head2 print($dfa, $title)
1370              
1371             Print the specified B<$dfa> using the specified B<$title>.
1372              
1373             Parameter Description
1374             1 $dfa DFA
1375             2 $title Optional title
1376              
1377             B
1378              
1379              
1380             my $dfa = fromExpr # Construct DFA
1381             (zeroOrMore(sequence('a'..'c')),
1382             except('b'..'d')
1383             );
1384              
1385             ok $dfa->parser->accepts(qw(a b c a ));
1386             ok !$dfa->parser->accepts(qw(a b c a b));
1387             ok !$dfa->parser->accepts(qw(a b c a c));
1388             ok !$dfa->parser->accepts(qw(a c c a b c));
1389              
1390              
1391             ok $dfa->𝗽𝗿𝗶𝗻𝘁(q(Test)) eq <
1392             Test
1393             State Final Symbol Target Final
1394             1 0 a 1 1
1395             2 1 1 b 2
1396             3 2 c 0
1397             END
1398              
1399              
1400             =head2 symbols($dfa)
1401              
1402             Return an array of all the symbols accepted by a B<$dfa>.
1403              
1404             Parameter Description
1405             1 $dfa DFA
1406              
1407             B
1408              
1409              
1410             my $dfa = fromExpr # Construct DFA
1411             ("a",
1412             oneOrMore(choice(qw(b c))),
1413             optional("d"),
1414             "e"
1415             );
1416             is_deeply ['a'..'e'], [$dfa->𝘀𝘆𝗺𝗯𝗼𝗹𝘀]; # List 𝘀𝘆𝗺𝗯𝗼𝗹𝘀
1417              
1418              
1419             =head2 parser($dfa)
1420              
1421             Create a parser from a B<$dfa> constructed from a regular expression.
1422              
1423             Parameter Description
1424             1 $dfa Deterministic finite state automaton generated from an expression
1425              
1426             B
1427              
1428              
1429             my $dfa = fromExpr # Construct DFA
1430             ("a",
1431             oneOrMore(choice(qw(b c))),
1432             optional("d"),
1433             "e"
1434             );
1435             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1436              
1437             my $dfa = fromExpr # Construct DFA
1438             (element("a"),
1439             oneOrMore(choice(element("b"), element("c"))),
1440             optional(element("d")),
1441             element("e")
1442             );
1443             my $𝗽𝗮𝗿𝘀𝗲𝗿 = $dfa->𝗽𝗮𝗿𝘀𝗲𝗿; # New 𝗽𝗮𝗿𝘀𝗲𝗿
1444              
1445             eval { $𝗽𝗮𝗿𝘀𝗲𝗿->accept($_) } for qw(a b a); # Try to parse a b a
1446              
1447             is_deeply [$𝗽𝗮𝗿𝘀𝗲𝗿->next], [qw(b c d e)]; # Next acceptable symbol
1448             is_deeply $𝗽𝗮𝗿𝘀𝗲𝗿->processed, [qw(a b)]; # Symbols processed
1449              
1450             ok !$𝗽𝗮𝗿𝘀𝗲𝗿->final; # Not in a final state
1451              
1452             ok $dfa->dumpAsJson eq <
1453             {
1454             "finalStates" : {
1455             "0" : null,
1456             "1" : null,
1457             "2" : null,
1458             "4" : null,
1459             "5" : 1
1460             },
1461             "transitions" : {
1462             "0" : {
1463             "a" : "2"
1464             },
1465             "1" : {
1466             "b" : "1",
1467             "c" : "1",
1468             "d" : "4",
1469             "e" : "5"
1470             },
1471             "2" : {
1472             "b" : "1",
1473             "c" : "1"
1474             },
1475             "4" : {
1476             "e" : "5"
1477             }
1478             }
1479             }
1480             END
1481              
1482              
1483             =head2 dumpAsJson($dfa)
1484              
1485             Create a JSON string representing a B<$dfa>.
1486              
1487             Parameter Description
1488             1 $dfa Deterministic finite state automaton generated from an expression
1489              
1490             B
1491              
1492              
1493             my $dfa = fromExpr # Construct DFA
1494             ("a",
1495             oneOrMore(choice(qw(b c))),
1496             optional("d"),
1497             "e"
1498             );
1499             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1500              
1501              
1502             =head2 printAsExpr($dfa)
1503              
1504             Print a B<$dfa> as an expression.
1505              
1506             Parameter Description
1507             1 $dfa DFA
1508              
1509             B
1510              
1511              
1512             if (1)
1513             {my $e = q/element(q(a)), zeroOrMore(choice(element(q(b)), element(q(c)))), element(q(d))/;
1514             my $d = eval qq/fromExpr($e)/;
1515             confess $@ if $@;
1516              
1517             my $E = $d->𝗽𝗿𝗶𝗻𝘁𝗔𝘀𝗘𝘅𝗽𝗿;
1518             ok $e eq $E;
1519              
1520             my $R = $d->printAsRe;
1521             ok $R eq q(a (b | c)* d);
1522              
1523             my $D = parseDtdElement(q(a, (b | c)*, d));
1524             my $S = $D->𝗽𝗿𝗶𝗻𝘁𝗔𝘀𝗘𝘅𝗽𝗿;
1525             ok $e eq $S;
1526             }
1527              
1528              
1529             =head2 printAsRe($dfa)
1530              
1531             Print a B<$dfa> as a regular expression.
1532              
1533             Parameter Description
1534             1 $dfa DFA
1535              
1536             B
1537              
1538              
1539             if (1)
1540             {my $e = q/element(q(a)), zeroOrMore(choice(element(q(b)), element(q(c)))), element(q(d))/;
1541             my $d = eval qq/fromExpr($e)/;
1542             confess $@ if $@;
1543              
1544             my $E = $d->printAsExpr;
1545             ok $e eq $E;
1546              
1547             my $R = $d->𝗽𝗿𝗶𝗻𝘁𝗔𝘀𝗥𝗲;
1548             ok $R eq q(a (b | c)* d);
1549              
1550             my $D = parseDtdElement(q(a, (b | c)*, d));
1551             my $S = $D->printAsExpr;
1552             ok $e eq $S;
1553             }
1554              
1555              
1556             =head2 parseDtdElementAST($string)
1557              
1558             Convert the Dtd Element definition in B<$string> to a parse tree.
1559              
1560             Parameter Description
1561             1 $string String representation of DTD element expression
1562              
1563             B
1564              
1565              
1566             if (1)
1567             {is_deeply unbless(𝗽𝗮𝗿𝘀𝗲𝗗𝘁𝗱𝗘𝗹𝗲𝗺𝗲𝗻𝘁𝗔𝗦𝗧(q(a, (b | c)*, d))),
1568             ["sequence",
1569             ["sequence",
1570             ["element", "a"],
1571             ["zeroOrMore", ["choice", ["element", "b"], ["element", "c"]]],
1572             ],
1573             ["element", "d"],
1574             ];
1575             }
1576              
1577              
1578             =head2 parseDtdElement($string)
1579              
1580             Convert the L <>DTD> Element definition in the specified B<$string> to a DFA.
1581              
1582             Parameter Description
1583             1 $string DTD element expression string
1584              
1585             B
1586              
1587              
1588             if (1)
1589             {my $e = q/element(q(a)), zeroOrMore(choice(element(q(b)), element(q(c)))), element(q(d))/;
1590             my $d = eval qq/fromExpr($e)/;
1591             confess $@ if $@;
1592              
1593             my $E = $d->printAsExpr;
1594             ok $e eq $E;
1595              
1596             my $R = $d->printAsRe;
1597             ok $R eq q(a (b | c)* d);
1598              
1599             my $D = 𝗽𝗮𝗿𝘀𝗲𝗗𝘁𝗱𝗘𝗹𝗲𝗺𝗲𝗻𝘁(q(a, (b | c)*, d));
1600             my $S = $D->printAsExpr;
1601             ok $e eq $S;
1602             }
1603              
1604              
1605             =head1 Paths
1606              
1607             Find paths in a DFA.
1608              
1609             =head2 shortPaths($dfa)
1610              
1611             Find a set of paths that reach every state in the DFA with each path terminating in a final state.
1612              
1613             Parameter Description
1614             1 $dfa DFA
1615              
1616             B
1617              
1618              
1619             if (1)
1620             {my $dfa = fromExpr
1621             (zeroOrMore("a"),
1622             oneOrMore("b"),
1623             optional("c"),
1624             "d"
1625             );
1626              
1627             ok !$dfa->parser->accepts(qw());
1628             ok !$dfa->parser->accepts(qw(a));
1629             ok !$dfa->parser->accepts(qw(b));
1630             ok !$dfa->parser->accepts(qw(c));
1631             ok !$dfa->parser->accepts(qw(d));
1632             ok $dfa->parser->accepts(qw(b c d));
1633             ok $dfa->parser->accepts(qw(b d));
1634             ok !$dfa->parser->accepts(qw(b a));
1635             ok $dfa->parser->accepts(qw(b b d));
1636              
1637             is_deeply 𝘀𝗵𝗼𝗿𝘁𝗣𝗮𝘁𝗵𝘀 ($dfa), { "b c d" => ["b", "c", "d"], "b d" => ["b", "d"] };
1638             is_deeply longPaths($dfa),
1639             {"a b b c d" => ["a", "b", "b", "c", "d"],
1640             "a b b d" => ["a", "b", "b", "d"],
1641             "a b c d" => ["a" .. "d"],
1642             "a b d" => ["a", "b", "d"],
1643             "b b c d" => ["b", "b", "c", "d"],
1644             "b b d" => ["b", "b", "d"],
1645             "b c d" => ["b", "c", "d"],
1646             "b d" => ["b", "d"]};
1647             }
1648              
1649              
1650             =head2 longPaths($dfa)
1651              
1652             Find a set of paths that traverse each transition in the DFA with each path terminating in a final state.
1653              
1654             Parameter Description
1655             1 $dfa DFA
1656              
1657             B
1658              
1659              
1660             if (1)
1661             {my $dfa = fromExpr
1662             (zeroOrMore("a"),
1663             oneOrMore("b"),
1664             optional("c"),
1665             "d"
1666             );
1667              
1668             ok !$dfa->parser->accepts(qw());
1669             ok !$dfa->parser->accepts(qw(a));
1670             ok !$dfa->parser->accepts(qw(b));
1671             ok !$dfa->parser->accepts(qw(c));
1672             ok !$dfa->parser->accepts(qw(d));
1673             ok $dfa->parser->accepts(qw(b c d));
1674             ok $dfa->parser->accepts(qw(b d));
1675             ok !$dfa->parser->accepts(qw(b a));
1676             ok $dfa->parser->accepts(qw(b b d));
1677              
1678             is_deeply shortPaths ($dfa), { "b c d" => ["b", "c", "d"], "b d" => ["b", "d"] };
1679             is_deeply 𝗹𝗼𝗻𝗴𝗣𝗮𝘁𝗵𝘀($dfa),
1680             {"a b b c d" => ["a", "b", "b", "c", "d"],
1681             "a b b d" => ["a", "b", "b", "d"],
1682             "a b c d" => ["a" .. "d"],
1683             "a b d" => ["a", "b", "d"],
1684             "b b c d" => ["b", "b", "c", "d"],
1685             "b b d" => ["b", "b", "d"],
1686             "b c d" => ["b", "c", "d"],
1687             "b d" => ["b", "d"]};
1688             }
1689              
1690              
1691             =head2 loops($dfa)
1692              
1693             Find the non repeating loops from each state.
1694              
1695             Parameter Description
1696             1 $dfa DFA
1697              
1698             B
1699              
1700              
1701             if (1)
1702             {my $d = fromExpr choice
1703             oneOrMore "a",
1704             oneOrMore "b",
1705             oneOrMore "c",
1706             oneOrMore "d";
1707              
1708             is_deeply $d->print("(a(b(c(d)+)+)+)+"), <
1709             (a(b(c(d)+)+)+)+
1710             State Final Symbol Target Final
1711             1 0 a 3
1712             2 1 d 2 1
1713             3 2 1 a 3
1714             4 b 4
1715             5 c 1
1716             6 d 2 1
1717             7 3 b 4
1718             8 4 c 1
1719             END
1720              
1721             ok !$d->parser->accepts(qw());
1722             ok !$d->parser->accepts(qw(a b c));
1723             ok $d->parser->accepts(qw(a b c d));
1724             ok $d->parser->accepts(qw(a b c d b c d d));
1725             ok !$d->parser->accepts(qw(a b c b d c d d));
1726             ok !$d->parser->accepts(qw(a b c d a));
1727              
1728             is_deeply $d->𝗹𝗼𝗼𝗽𝘀, {
1729             1 => [["d", "a", "b", "c"], ["d", "b", "c"], ["d", "c"]],
1730             2 => [["a" .. "d"], ["b", "c", "d"], ["c", "d"], ["d"]],
1731             3 => [["b", "c", "d", "a"]],
1732             4 => [["c", "d", "a", "b"], ["c", "d", "b"]]};
1733              
1734             is_deeply shortPaths($d), {"a b c d" => ["a" .. "d"]};
1735             is_deeply longPaths ($d), { "a b c d" => ["a" .. "d"], "a b c d d" => ["a" .. "d", "d"] };
1736              
1737             #say STDERR $d->printAsExpr;
1738             }
1739              
1740              
1741             =head1 Parser methods
1742              
1743             Use the DFA to parse a sequence of symbols
1744              
1745             =head2 Data::DFA::Parser::accept($parser, $symbol)
1746              
1747             Using the specified B<$parser>, accept the next symbol drawn from the symbol set if possible by moving to a new state otherwise confessing with a helpful message that such a move is not possible.
1748              
1749             Parameter Description
1750             1 $parser DFA Parser
1751             2 $symbol Next symbol to be processed by the finite state automaton
1752              
1753             B
1754              
1755              
1756             my $dfa = fromExpr # Construct DFA
1757             (element("a"),
1758             oneOrMore(choice(element("b"), element("c"))),
1759             optional(element("d")),
1760             element("e")
1761             );
1762             my $parser = $dfa->parser; # New parser
1763              
1764             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1765              
1766             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1767             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1768              
1769             ok !$parser->final; # Not in a final state
1770              
1771             ok $dfa->dumpAsJson eq <
1772             {
1773             "finalStates" : {
1774             "0" : null,
1775             "1" : null,
1776             "2" : null,
1777             "4" : null,
1778             "5" : 1
1779             },
1780             "transitions" : {
1781             "0" : {
1782             "a" : "2"
1783             },
1784             "1" : {
1785             "b" : "1",
1786             "c" : "1",
1787             "d" : "4",
1788             "e" : "5"
1789             },
1790             "2" : {
1791             "b" : "1",
1792             "c" : "1"
1793             },
1794             "4" : {
1795             "e" : "5"
1796             }
1797             }
1798             }
1799             END
1800              
1801             my $dfa = fromExpr # Construct DFA
1802             (zeroOrMore(sequence('a'..'c')),
1803             except('b'..'d')
1804             );
1805              
1806             ok $dfa->parser->accepts(qw(a b c a ));
1807             ok !$dfa->parser->accepts(qw(a b c a b));
1808             ok !$dfa->parser->accepts(qw(a b c a c));
1809             ok !$dfa->parser->accepts(qw(a c c a b c));
1810              
1811              
1812             ok $dfa->print(q(Test)) eq <
1813             Test
1814             State Final Symbol Target Final
1815             1 0 a 1 1
1816             2 1 1 b 2
1817             3 2 c 0
1818             END
1819              
1820              
1821             =head2 Data::DFA::Parser::final($parser)
1822              
1823             Returns whether the specified B<$parser> is in a final state or not.
1824              
1825             Parameter Description
1826             1 $parser DFA Parser
1827              
1828             B
1829              
1830              
1831             my $dfa = fromExpr # Construct DFA
1832             (zeroOrMore(sequence('a'..'c')),
1833             except('b'..'d')
1834             );
1835              
1836             ok $dfa->parser->accepts(qw(a b c a ));
1837             ok !$dfa->parser->accepts(qw(a b c a b));
1838             ok !$dfa->parser->accepts(qw(a b c a c));
1839             ok !$dfa->parser->accepts(qw(a c c a b c));
1840              
1841              
1842             ok $dfa->print(q(Test)) eq <
1843             Test
1844             State Final Symbol Target Final
1845             1 0 a 1 1
1846             2 1 1 b 2
1847             3 2 c 0
1848             END
1849              
1850              
1851             =head2 Data::DFA::Parser::next($parser)
1852              
1853             Returns an array of symbols that would be accepted in the current state by the specified B<$parser>.
1854              
1855             Parameter Description
1856             1 $parser DFA Parser
1857              
1858             B
1859              
1860              
1861             my $dfa = fromExpr # Construct DFA
1862             (element("a"),
1863             oneOrMore(choice(element("b"), element("c"))),
1864             optional(element("d")),
1865             element("e")
1866             );
1867             my $parser = $dfa->parser; # New parser
1868              
1869             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1870              
1871             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1872             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1873              
1874             ok !$parser->final; # Not in a final state
1875              
1876             ok $dfa->dumpAsJson eq <
1877             {
1878             "finalStates" : {
1879             "0" : null,
1880             "1" : null,
1881             "2" : null,
1882             "4" : null,
1883             "5" : 1
1884             },
1885             "transitions" : {
1886             "0" : {
1887             "a" : "2"
1888             },
1889             "1" : {
1890             "b" : "1",
1891             "c" : "1",
1892             "d" : "4",
1893             "e" : "5"
1894             },
1895             "2" : {
1896             "b" : "1",
1897             "c" : "1"
1898             },
1899             "4" : {
1900             "e" : "5"
1901             }
1902             }
1903             }
1904             END
1905              
1906              
1907             =head2 Data::DFA::Parser::accepts($parser, @symbols)
1908              
1909             Confirm that the specified B<$parser> accepts an array representing a sequence of symbols.
1910              
1911             Parameter Description
1912             1 $parser DFA Parser
1913             2 @symbols Array of symbols
1914              
1915             B
1916              
1917              
1918             my $dfa = fromExpr # Construct DFA
1919             ("a",
1920             oneOrMore(choice(qw(b c))),
1921             optional("d"),
1922             "e"
1923             );
1924             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1925              
1926             my $dfa = fromExpr # Construct DFA
1927             (element("a"),
1928             oneOrMore(choice(element("b"), element("c"))),
1929             optional(element("d")),
1930             element("e")
1931             );
1932             my $parser = $dfa->parser; # New parser
1933              
1934             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1935              
1936             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1937             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1938              
1939             ok !$parser->final; # Not in a final state
1940              
1941             ok $dfa->dumpAsJson eq <
1942             {
1943             "finalStates" : {
1944             "0" : null,
1945             "1" : null,
1946             "2" : null,
1947             "4" : null,
1948             "5" : 1
1949             },
1950             "transitions" : {
1951             "0" : {
1952             "a" : "2"
1953             },
1954             "1" : {
1955             "b" : "1",
1956             "c" : "1",
1957             "d" : "4",
1958             "e" : "5"
1959             },
1960             "2" : {
1961             "b" : "1",
1962             "c" : "1"
1963             },
1964             "4" : {
1965             "e" : "5"
1966             }
1967             }
1968             }
1969             END
1970              
1971              
1972             =head1 Data Structures
1973              
1974             Data structures used by this package.
1975              
1976              
1977             =head2 Data::DFA Definition
1978              
1979              
1980             DFA State
1981              
1982              
1983              
1984              
1985             =head3 Output fields
1986              
1987              
1988             B - Whether this state is final
1989              
1990             B - Hash whose keys are the NFA states that contributed to this super state
1991              
1992             B - Pumping lemmas for this state
1993              
1994             B - Sequence of states to final state minus pumped states
1995              
1996             B - Name of the state - the join of the NFA keys
1997              
1998             B - Transitions from this state
1999              
2000              
2001              
2002             =head2 Data::DFA::Parser Definition
2003              
2004              
2005             Parse a sequence of symbols with a DFA
2006              
2007              
2008              
2009              
2010             =head3 Output fields
2011              
2012              
2013             B - DFA being used
2014              
2015             B - Symbol on which we failed
2016              
2017             B - Symbols processed
2018              
2019             B - Current state
2020              
2021              
2022              
2023             =head2 Data::DFA::State Definition
2024              
2025              
2026             DFA State
2027              
2028              
2029              
2030              
2031             =head3 Output fields
2032              
2033              
2034             B - Whether this state is final
2035              
2036             B - Hash whose keys are the NFA states that contributed to this super state
2037              
2038             B - Pumping lemmas for this state
2039              
2040             B - Sequence of states to final state minus pumped states
2041              
2042             B - Name of the state - the join of the NFA keys
2043              
2044             B - Transitions from this state
2045              
2046              
2047              
2048             =head1 Private Methods
2049              
2050             =head2 newDFA()
2051              
2052             Create a new DFA.
2053              
2054              
2055             =head2 newState(%options)
2056              
2057             Create a new DFA state with the specified options.
2058              
2059             Parameter Description
2060             1 %options DFA state as hash
2061              
2062             =head2 fromNfa($nfa)
2063              
2064             Create a DFA parser from an NFA.
2065              
2066             Parameter Description
2067             1 $nfa Nfa
2068              
2069             =head2 finalState($nfa, $reach)
2070              
2071             Check whether, in the specified B<$nfa>, any of the states named in the hash reference B<$reach> are final. Final states that refer to reduce rules are checked for reduce conflicts.
2072              
2073             Parameter Description
2074             1 $nfa NFA
2075             2 $reach Hash of states in the NFA
2076              
2077             =head2 superState($dfa, $superStateName, $nfa, $symbols, $nfaSymbolTransitions)
2078              
2079             Create super states from existing superstate.
2080              
2081             Parameter Description
2082             1 $dfa DFA
2083             2 $superStateName Start state in DFA
2084             3 $nfa NFA we are converting
2085             4 $symbols Symbols in the NFA we are converting
2086             5 $nfaSymbolTransitions States reachable from each state by symbol
2087              
2088             =head2 superStates($dfa, $SuperStateName, $nfa)
2089              
2090             Create super states from existing superstate.
2091              
2092             Parameter Description
2093             1 $dfa DFA
2094             2 $SuperStateName Start state in DFA
2095             3 $nfa NFA we are tracking
2096              
2097             =head2 transitionOnSymbol($dfa, $superStateName, $symbol)
2098              
2099             The super state reached by transition on a symbol from a specified state.
2100              
2101             Parameter Description
2102             1 $dfa DFA
2103             2 $superStateName Start state in DFA
2104             3 $symbol Symbol
2105              
2106             =head2 renumberDfa($dfa, $initialStateName)
2107              
2108             Renumber the states in the specified B<$dfa>.
2109              
2110             Parameter Description
2111             1 $dfa DFA
2112             2 $initialStateName Initial super state name
2113              
2114             B
2115              
2116              
2117             my $dfa = fromExpr # Construct DFA
2118             (zeroOrMore(sequence('a'..'c')),
2119             except('b'..'d')
2120             );
2121              
2122             ok $dfa->parser->accepts(qw(a b c a ));
2123             ok !$dfa->parser->accepts(qw(a b c a b));
2124             ok !$dfa->parser->accepts(qw(a b c a c));
2125             ok !$dfa->parser->accepts(qw(a c c a b c));
2126              
2127              
2128             ok $dfa->print(q(Test)) eq <
2129             Test
2130             State Final Symbol Target Final
2131             1 0 a 1 1
2132             2 1 1 b 2
2133             3 2 c 0
2134             END
2135              
2136              
2137             =head2 printFinal($final)
2138              
2139             Print a final state
2140              
2141             Parameter Description
2142             1 $final Final State
2143              
2144             =head2 removeDuplicatedStates($dfa)
2145              
2146             Remove duplicated states in a B<$dfa>.
2147              
2148             Parameter Description
2149             1 $dfa Deterministic finite state automaton generated from an expression
2150              
2151             =head2 removeUnreachableStates($dfa)
2152              
2153             Remove unreachable states in a B<$dfa>.
2154              
2155             Parameter Description
2156             1 $dfa Deterministic finite state automaton generated from an expression
2157              
2158             =head2 printAsExpr2($dfa, %options)
2159              
2160             Print a DFA B<$dfa_> in an expression form determined by the specified B<%options>.
2161              
2162             Parameter Description
2163             1 $dfa Dfa
2164             2 %options Options.
2165              
2166             =head2 subArray($A, $B)
2167              
2168             Whether the second array is contained within the first.
2169              
2170             Parameter Description
2171             1 $A Exterior array
2172             2 $B Interior array
2173              
2174             =head2 removeLongerPathsThatContainShorterPaths($paths)
2175              
2176             Remove longer paths that contain shorter paths.
2177              
2178             Parameter Description
2179             1 $paths Paths
2180              
2181              
2182             =head1 Index
2183              
2184              
2185             1 L - Choice from amongst one or more elements.
2186              
2187             2 L - Using the specified B<$parser>, accept the next symbol drawn from the symbol set if possible by moving to a new state otherwise confessing with a helpful message that such a move is not possible.
2188              
2189             3 L - Confirm that the specified B<$parser> accepts an array representing a sequence of symbols.
2190              
2191             4 L - Returns whether the specified B<$parser> is in a final state or not.
2192              
2193             5 L - Returns an array of symbols that would be accepted in the current state by the specified B<$parser>.
2194              
2195             6 L - Create a JSON string representing a B<$dfa>.
2196              
2197             7 L - One element.
2198              
2199             8 L - Choice from amongst all symbols except the ones mentioned
2200              
2201             9 L - Check whether, in the specified B<$nfa>, any of the states named in the hash reference B<$reach> are final.
2202              
2203             10 L - Create a DFA parser from a regular B<@expression>.
2204              
2205             11 L - Create a DFA parser from an NFA.
2206              
2207             12 L - Find a set of paths that traverse each transition in the DFA with each path terminating in a final state.
2208              
2209             13 L - Find the non repeating loops from each state.
2210              
2211             14 L - Create a new DFA.
2212              
2213             15 L - Create a new DFA state with the specified options.
2214              
2215             16 L - One or more repetitions of a sequence of elements.
2216              
2217             17 L - An optional sequence of element.
2218              
2219             18 L - Convert the L <>DTD> Element definition in the specified B<$string> to a DFA.
2220              
2221             19 L - Convert the Dtd Element definition in B<$string> to a parse tree.
2222              
2223             20 L - Create a parser from a B<$dfa> constructed from a regular expression.
2224              
2225             21 L - Print the specified B<$dfa> using the specified B<$title>.
2226              
2227             22 L - Print a B<$dfa> as an expression.
2228              
2229             23 L - Print a DFA B<$dfa_> in an expression form determined by the specified B<%options>.
2230              
2231             24 L - Print a B<$dfa> as a regular expression.
2232              
2233             25 L - Print a final state
2234              
2235             26 L - Remove duplicated states in a B<$dfa>.
2236              
2237             27 L - Remove longer paths that contain shorter paths.
2238              
2239             28 L - Remove unreachable states in a B<$dfa>.
2240              
2241             29 L - Renumber the states in the specified B<$dfa>.
2242              
2243             30 L - Sequence of elements.
2244              
2245             31 L - Find a set of paths that reach every state in the DFA with each path terminating in a final state.
2246              
2247             32 L - Whether the second array is contained within the first.
2248              
2249             33 L - Create super states from existing superstate.
2250              
2251             34 L - Create super states from existing superstate.
2252              
2253             35 L - Return an array of all the symbols accepted by a B<$dfa>.
2254              
2255             36 L - The super state reached by transition on a symbol from a specified state.
2256              
2257             37 L - Zero or more repetitions of a sequence of elements.
2258              
2259             =head1 Installation
2260              
2261             This module is written in 100% Pure Perl and, thus, it is easy to read,
2262             comprehend, use, modify and install via B:
2263              
2264             sudo cpan install Data::DFA
2265              
2266             =head1 Author
2267              
2268             L
2269              
2270             L
2271              
2272             =head1 Copyright
2273              
2274             Copyright (c) 2016-2019 Philip R Brenan.
2275              
2276             This module is free software. It may be used, redistributed and/or modified
2277             under the same terms as Perl itself.
2278              
2279             =cut
2280              
2281              
2282              
2283             # Tests and documentation
2284              
2285             sub test
2286 1     1 0 12 {my $p = __PACKAGE__;
2287 1         10 binmode($_, ":utf8") for *STDOUT, *STDERR;
2288 1 50       67 return if eval "eof(${p}::DATA)";
2289 1         59 my $s = eval "join('', <${p}::DATA>)";
2290 1 50       10 $@ and die $@;
2291 1     1   7 eval $s;
  1     1   1  
  1     1   39  
  1     1   6  
  1         2  
  1         30  
  1         4  
  1         2  
  1         9  
  1         836  
  1         68020  
  1         10  
  1         90  
2292 1 50       10 $@ and die $@;
2293 1         144 1
2294             }
2295              
2296             test unless caller;
2297              
2298             1;
2299             # podDocumentation
2300             __DATA__