File Coverage

blib/lib/Data/DFA.pm
Criterion Covered Total %
statement 362 383 94.5
branch 70 96 72.9
condition 10 18 55.5
subroutine 69 71 97.1
pod 30 31 96.7
total 541 599 90.3


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -I/home/phil/perl/cpan/DataNFA/lib/
2             #-------------------------------------------------------------------------------
3             # Deterministic finite state parser from a regular expression.
4             # Philip R Brenan at gmail dot com, Appa Apps Ltd Inc., 2018-2020
5             #-------------------------------------------------------------------------------
6             # podDocumentation
7             package Data::DFA;
8             our $VERSION = 20201030;
9             require v5.26;
10 1     1   908 use warnings FATAL => qw(all);
  1         8  
  1         39  
11 1     1   5 use strict;
  1         2  
  1         32  
12 1     1   7 use Carp qw(confess);
  1         2  
  1         76  
13 1     1   519 use Data::Dump qw(dump);
  1         7504  
  1         59  
14 1     1   668 use Data::NFA;
  1         146872  
  1         55  
15 1     1   12 use Data::Table::Text qw(:all);
  1         2  
  1         1368  
16 1     1   8 use feature qw(current_sub say);
  1         2  
  1         4832  
17              
18             # dfa: {state=>state name, transitions=>{symbol=>state}, final state=>{reduction rule=>1}, pumps=>[[pumping lemmas]]}
19              
20             my $logFile = q(/home/phil/z/z/z/zzz.txt); # Log printed results if developing
21              
22             #D1 Construct regular expression # Construct a regular expression that defines the language to be parsed using the following combining operations:
23              
24             sub element($) #S One element.
25 54     54 1 302 {my ($label) = @_; # Transition symbol
26 54         125 &Data::NFA::element(@_);
27             }
28              
29             sub sequence(@) #S Sequence of elements.
30 20     20 1 124 {my (@elements) = @_; # Elements
31 20         56 &Data::NFA::sequence(@_);
32             }
33              
34             sub optional(@) #S An optional sequence of element.
35 6     6 1 62 {my (@element) = @_; # Elements
36 6         24 &Data::NFA::optional(@_);
37             }
38              
39             sub zeroOrMore(@) #S Zero or more repetitions of a sequence of elements.
40 30     30 1 194 {my (@element) = @_; # Elements
41 30         97 &Data::NFA::zeroOrMore(@_);
42             }
43              
44             sub oneOrMore(@) #S One or more repetitions of a sequence of elements.
45 33     33 1 255 {my (@element) = @_; # Elements
46 33         110 &Data::NFA::oneOrMore(@_);
47             }
48              
49             sub choice(@) #S Choice from amongst one or more elements.
50 22     22 1 158 {my (@elements) = @_; # Elements to be chosen from
51 22         76 &Data::NFA::choice(@_);
52             }
53              
54             sub except(@) #S Choice from amongst all symbols except the ones mentioned
55 1     1 1 13 {my (@elements) = @_; # Elements to be chosen from
56 1         8 &Data::NFA::except(@_);
57             }
58              
59             #1 Deterministic finite state parser # Create a deterministic finite state automaton to parse sequences of symbols in the language defined by a regular expression.
60              
61             sub newDFA #P Create a new DFA.
62 136     136 1 396 {genHash(q(Data::DFA)); # DFA State
63             }
64              
65             sub newState(%) #P Create a new DFA state with the specified options.
66 495     495 1 1477 {my (%options) = @_; # DFA state as hash
67              
68 495         1144 my $r = genHash(q(Data::DFA::State), # DFA State
69             state => undef, # Name of the state - the join of the NFA keys
70             nfaStates => undef, # Hash whose keys are the NFA states that contributed to this super state
71             transitions => undef, # Transitions from this state
72             final => undef, # Whether this state is final
73             pump => undef, # Pumping lemmas for this state
74             sequence => undef, # Sequence of states to final state minus pumped states
75             );
76              
77 495         37355 %$r = (%$r, %options);
78              
79 495         1454 $r
80             }
81              
82             sub fromNfa($) #P Create a DFA parser from an NFA.
83 38     38 1 181536 {my ($nfa) = @_; # Nfa
84              
85 38         114 my $dfa = newDFA; # A DFA is a hash of states
86              
87 38         473 my @nfaStates = (0, $nfa->statesReachableViaJumps(0)->@*); # Nfa states reachable from the start state
88 38         8237 my $initialSuperState = join ' ', sort @nfaStates; # Initial super state
89              
90             $$dfa{$initialSuperState} = newState( # Start state
91             state => $initialSuperState, # Name of the state - the join of the NFA keys
92 222         429 nfaStates => {map{$_=>1} @nfaStates}, # Hash whose keys are the NFA states that contributed to this super state
93 38         119 final => finalState($nfa, {map {$_=>1} @nfaStates}), # Whether this state is final
  222         414  
94             );
95              
96 38         217 $dfa->superStates($initialSuperState, $nfa); # Create DFA superstates from states reachable from the start state
97              
98 38         127 my $r = $dfa->renumberDfa($initialSuperState); # Renumber
99 38         141 my $d = $r->removeDuplicatedStates; # Remove duplicate states
100 38         145 my $u = $d->removeUnreachableStates; # Remove unreachable states
101 38         93 my $R = $u->renumberDfa(0); # Renumber again
102              
103 38         1268 $R # Resulting Dfa
104             }
105              
106             sub fromExpr(@) #S Create a DFA parser from a regular B<@expression>.
107 37     37 1 258 {my (@expression) = @_; # Regular expression
108 37         118 fromNfa(Data::NFA::fromExpr(@expression))
109             }
110              
111             sub finalState($$) #P Check whether, in the specified B<$nfa>, any of the states named in the hash reference B<$reach> are final. Final states that refer to reduce rules are checked for reduce conflicts.
112 152     152 1 276 {my ($nfa, $reach) = @_; # NFA, hash of states in the NFA
113 152         233 my @final; # Reduction rule
114              
115 152         492 for my $state(sort keys %$reach) # Each state we can reach
116 947 100       16194 {if (my $f = $nfa->isFinal($state))
117 219         5507 {push @final, $f;
118             }
119             }
120              
121 152 100       2821 @final ? \@final : undef; # undef if not final else reduction rules
122             }
123              
124             sub superState($$$$$) #P Create super states from existing superstate.
125 152     152 1 334 {my ($dfa, $superStateName, $nfa, $symbols, $nfaSymbolTransitions) = @_; # DFA, start state in DFA, NFA we are converting, symbols in the NFA we are converting, states reachable from each state by symbol
126 152         272 my $superState = $$dfa{$superStateName};
127 152         2643 my $nfaStates = $superState->nfaStates;
128              
129 152         641 my @created; # New super states created
130 152         273 for my $symbol(@$symbols) # Each symbol
131 496         1361 {my $reach = {}; # States in NFS reachable from start state in dfa
132 496         1704 for my $nfaState(sort keys %$nfaStates) # Each NFA state in the dfa start state
133 3113 50       5279 {if (my $r = $$nfaSymbolTransitions{$nfaState}{$symbol}) # States in the NFA reachable on the symbol
134 3113         4179 {map{$$reach{$_}++} @$r; # Accumulate NFA reachable NFA states
  7349         10449  
135             }
136             }
137              
138 496 100       1345 if (keys %$reach) # Current symbol takes us somewhere
139 229         1099 {my $newSuperStateName = join ' ', sort keys %$reach; # Name of the super state reached from the start state via the current symbol
140 229 100       614 if (!$$dfa{$newSuperStateName}) # Super state does not exists so create it
141 114         268 {my $newState = $$dfa{$newSuperStateName} = newState
142             (nfaStates => $reach,
143             transitions => undef,
144             final => finalState($nfa, $reach),
145             );
146 114         264 push @created, $newSuperStateName; # Find all its transitions
147             }
148 229         4285 $$dfa{$superStateName}->transitions->{$symbol} = $newSuperStateName;
149             }
150             }
151              
152             @created
153 152         1282 }
154              
155             sub superStates($$$) #P Create super states from existing superstate.
156 38     38 1 103 {my ($dfa, $SuperStateName, $nfa) = @_; # DFA, start state in DFA, NFA we are tracking
157 38         136 my $symbols = [$nfa->symbols]; # Symbols in nfa
158 38         12244 my $transitions = $nfa->allTransitions; # Precompute transitions in the NFA
159              
160 38         327086 my @fix = ($SuperStateName);
161 38         151 while(@fix) # Create each superstate as the set of all nfa states we could be in after each transition on a symbol
162 152         424 {push @fix, superState($dfa, pop @fix, $nfa, $symbols, $transitions);
163             }
164             }
165              
166             sub transitionOnSymbol($$$) #P The super state reached by transition on a symbol from a specified state.
167 75     75 1 141 {my ($dfa, $superStateName, $symbol) = @_; # DFA, start state in DFA, symbol
168 75         107 my $superState = $$dfa{$superStateName};
169 75         1188 my $transitions = $superState->transitions;
170              
171 75         342 $$transitions{$symbol}
172             } # transitionOnSymbol
173              
174             sub renumberDfa($$) #P Renumber the states in the specified B<$dfa>.
175 98     98 1 212 {my ($dfa, $initialStateName) = @_; # DFA, initial super state name
176 98         144 my %rename;
177 98         194 my $cfa = newDFA;
178              
179 98         1055 $rename{$initialStateName} = 0; # The start state is always 0 in the dfa
180 98         321 for my $s(sort keys %$dfa) # Each state
181 343 100       784 {$rename{$s} = keys %rename if !exists $rename{$s}; # Rename state
182             }
183              
184 98         377 for my $superStateName(sort keys %$dfa) # Each state
185 343         1676 {my $sourceState = $rename{$superStateName};
186 343         569 my $s = $$cfa{$sourceState} = newState;
187 343         549 my $superState = $$dfa{$superStateName};
188              
189 343         6327 my $transitions = $superState->transitions;
190 343         1886 for my $symbol(sort keys %$transitions) # Rename the target of every transition
191 484         8659 {$s->transitions->{$symbol} = $rename{$$transitions{$symbol}};
192             }
193              
194 343         6246 $s->final = $superState->final;
195             }
196              
197             $cfa
198 98         692 } # renumberDfa
199              
200             sub univalent($) # Check that the L is univalent: a univalent L has a mapping from symbols to states. Returns a hash showing the mapping from symbols to states if the L is univalent, else returns B.
201 5     5 1 13 {my ($dfa) = @_; # Dfa to check
202              
203 5         12 my %symbols; # Symbol to states
204 5         26 for my $state(sort keys %$dfa) # Each state name
205 19         319 {my $transitions = $$dfa{$state}->transitions; # Transitions from state being checked
206 19         91 for my $symbol(sort keys %$transitions) # Each transition from the state being checked
207 25         40 {my $target = $$transitions{$symbol}; # New state reached by transition from state being checked
208 25         60 $symbols{$symbol}{$target}++; # Count targets for symbol
209             }
210             }
211              
212 5         13 my @multiple; my %single; # Symbols with multiple targets, single targets
213 5         15 for my $symbol(sort keys %symbols) # Symbols
214 12         34 {my @states = sort keys $symbols{$symbol}->%*; # States
215 12 100       28 if (@states == 1) # Univalent
216 10         25 {($single{$symbol}) = @states; # Mapping
217             }
218             else # Multivalent
219 2         7 {push @multiple, $symbol
220             }
221             }
222              
223 5 50 33     71 dumpFile($logFile, \%single) if -e $logFile and !@multiple; # Log the result if requested
224              
225 5 100       57 @multiple ? undef : \%single # Only return the mapping if it is valid
226             } # univalent
227              
228             #D1 Print # Pritn the Dfa in various ways.
229              
230             sub printFinal($) #P Print a final state
231 77     77 1 127 {my ($final) = @_; # final State
232 77         106 my %f;
233 77         157 for my $f(@$final)
234 126 50       283 {$f{ref($f) ? $f->print->($f) : $f}++;
235             }
236 77         366 join ' ', sort keys %f;
237             }
238              
239             sub print($;$) # Print the specified B<$dfa> using the specified B<$title>.
240 15     15 1 51 {my ($dfa, $title) = @_; # DFA, optional title
241              
242 15         36 my @out;
243 15         79 for my $superStateName(sort {$a <=> $b} keys %$dfa) # Each state
  71         129  
244 52         97 {my $superState = $$dfa{$superStateName};
245 52         883 my $transitions = $superState->transitions;
246 52         915 my $Final = $superState->final;
247 52 100       335 if (my @s = sort keys %$transitions) # Transitions present
248 50         86 {my $s = $s[0];
249 50         134 my $S = $dfa->transitionOnSymbol($superStateName, $s);
250 50         835 my $final = $$dfa{$S}->final;
251 50 100       268 push @out, [$superStateName, $Final ? 1 : q(), $s, $$transitions{$s},
252             printFinal($final)];
253 50         177 for(1..$#s)
254 25         58 {my $s = $s[$_];
255 25         50 my $S = $dfa->transitionOnSymbol($superStateName, $s);
256 25         408 my $final = $$dfa{$S}->final;
257 25         117 push @out, ['', '', $s, $$transitions{$s}, printFinal($final)];
258             }
259             }
260             else # No transitions present
261 2 50       11 {push @out, [$superStateName, $Final ? 1 : q(), q(), q(), printFinal($Final)];
262             }
263             }
264              
265             my $r = sub # Format results as a table
266 15 50   15   52 {if (@out)
267 15         99 {my $t = formatTable([@out], [qw(State Final Symbol Target Final)])."\n";
268 15 100       11713 my $s = $title ? "$title\n$t" : $t;
269 15         424 $s =~ s(\s*\Z) ()gs;
270 15         395 $s =~ s(\s*\n) (\n)gs;
271 15         61 return "$s\n";
272             }
273 0         0 "$title: No states in Dfa";
274 15         114 }->();
275              
276 15 50       446 owf($logFile, $r) if -e $logFile; # Log the result if requested
277 15         176 $r # Return the result
278             }
279              
280             sub symbols($) # Return an array of all the symbols accepted by a B<$dfa>.
281 2     2 1 6 {my ($dfa) = @_; # DFA
282 2         6 my %symbols;
283 2         8 for my $superState(values %$dfa) # Each state
284 10         160 {my $transitions = $superState->transitions;
285 10         69 $symbols{$_}++ for sort keys %$transitions; # Symbol for each transition
286             }
287              
288 2         39 sort keys %symbols;
289             }
290              
291             sub parser($;$) # Create a parser from a B<$dfa> constructed from a regular expression.
292 104     104 1 280 {my ($dfa, $observer) = @_; # Deterministic finite state automaton, optional observer
293 104         420 return genHash(q(Data::DFA::Parser), # Parse a sequence of symbols with a DFA
294             dfa => $dfa, # DFA being used
295             state => 0, # Current state
296             fail => undef, # Symbol on which we failed
297             observer => $observer, # Optional sub($parser, $symbol, $target) to observe transitions.
298             processed => [], # Symbols processed
299             );
300             }
301              
302             sub dumpAsJson($) # Create a JSON string representing a B<$dfa>.
303 0     0 1 0 {my ($dfa) = @_; # Deterministic finite state automaton generated from an expression
304 0         0 my $jfa;
305 0         0 for my $state(sort keys %$dfa) # Each state
306 0         0 {my $transitions = $$dfa{$state}->transitions; # Transitions
307 0         0 for my $t(sort keys %$transitions)
308 0         0 {$$jfa{transitions}{$state}{$t} = $$transitions{$t}; # Clone transitions
309             }
310 0         0 $$jfa{finalStates}{$state} = $$dfa{$state}->final; # Final states
311             }
312 0         0 encodeJson($jfa);
313             }
314              
315             sub removeDuplicatedStates($) #P Remove duplicated states in a B<$dfa>.
316 38     38 1 95 {my ($dfa) = @_; # Deterministic finite state automaton generated from an expression
317 38         52 my $deleted; # Deleted state count
318              
319 38         117 for(1..100) # Keep squeezing out duplicates
320 60         94 {my %d;
321 60         220 for my $state(sort keys %$dfa) # Each state
322 221         457 {my $s = $$dfa{$state}; # State
323             # my $c = dump([$s->transitions, $s->final]); # State content
324 221 100       4485 my $c = dump([$s->transitions, $s->final ? 1 : 0]); # State content - we only need to know if the state is final or not
325 221         74242 push $d{$c}->@*, $state;
326             }
327              
328 60         130 my %m; # Map deleted duplicated states back to undeleted original
329 60         145 for my $d(values %d) # Delete unitary states
330 191         355 {my ($b, @d) = $d->@*;
331 191 100       410 if (@d)
332 23         61 {for my $r(@d) # Map duplicated states to base unduplicated state
333 30         67 {$m{$r} = $b; # Map
334 30         147 delete $$dfa{$r}; # Remove duplicated state from DFA
335 30         75 ++$deleted;
336             }
337             }
338             }
339              
340 60 100       152 if (keys %m) # Remove duplicate states
341 22         54 {for my $state(values %$dfa) # Each state
342 69         1230 {my $transitions = $state->transitions;
343 69         394 for my $symbol(sort keys %$transitions)
344 95         161 {my $s = $$transitions{$symbol};
345 95 100       252 if (defined $m{$s})
346 33         111 {$$transitions{$symbol} = $m{$s};
347             }
348             }
349             }
350             }
351 38         121 else {last};
352             }
353              
354 38 100       125 $deleted ? renumberDfa($dfa, 0) : $dfa; # Renumber states if necessary
355             } # removeDuplicatedStates
356              
357             sub removeUnreachableStates($) #P Remove unreachable states in a B<$dfa>.
358 38     38 1 99 {my ($dfa) = @_; # Deterministic finite state automaton generated from an expression
359 38         69 my $deleted = 0; # Count of deleted unreachable states
360 38         112 my %reachable; # States reachable from the start state
361             my %checked; # States that have been checked
362 38         0 my @check; # States to check
363              
364 38         134 my ($startState) = sort keys %$dfa; # Start state name
365 38         99 $reachable{$startState}++; # Mark start state as reachable
366 38         61 $checked{$startState}++; # Mark start state as checked
367 38         88 push @check, $startState; # Check start state
368              
369 38         109 while(@check) # Check each state reachable from the start state
370 122         245 {my $state = pop @check; # State to check
371 122         2092 for my $s(sort keys $$dfa{$state}->transitions->%*) # Target each transition from the state
372 160         3064 {my $t = $$dfa{$state}->transitions->{$s}; # Target state
373 160         679 $reachable{$t}++; # Mark target as reachable
374 160 100       557 push @check, $t unless $checked{$t}++; # Check states reachable from the target state unless already checked
375             }
376             }
377              
378 38         165 for my $s(sort keys %$dfa) # Each state
379 122 50       238 {if (!$reachable{$s}) # Unreachable state
380 0         0 {++$deleted; # Count unreachable states
381 0         0 delete $$dfa{$s}; # Remove unreachable state
382             }
383             }
384 38 50       92 my $r = $deleted ? renumberDfa($dfa, 0) : $dfa; # Renumber states if necessary
385 38         111 $r
386             }
387              
388             # Trace from the start node through to each node remembering the long path. If
389             # we reach a node we have already visited then add a pumping lemma to it as the
390             # long path minus the short path used to reach the node the first time.
391             #
392             # Trace from start node to final states without revisiting nodes. Write the
393             # expressions so obtained as a choice of sequences with pumping lemmas.
394              
395             sub printAsExpr2($%) #P Print a DFA B<$dfa_> in an expression form determined by the specified B<%options>.
396             {my ($dfa, %options) = @_; # Dfa, options.
397              
398             checkKeys(\%options, # Check options
399             {element => q(Format a single element),
400             choice => q(Format a choice of expressions),
401             sequence => q(Format a sequence of expressions),
402             zeroOrMore => q(Format zero or more instances of an expression),
403             oneOrMore => q(One or more instances of an expression),
404             });
405              
406             my ($fe, $fc, $fs, $fz, $fo) = @options{qw(element choice sequence zeroOrMore oneOrMore)};
407              
408             my %pumped; # States pumped => [symbols along pump]
409             my $pump; $pump = sub # Create pumping lemmas for each state
410 51     51   121 {my ($state, @path) = @_; # Current state, path to state
411 51 100       127 if (defined $pumped{$state}) # State already visited
412 17         70 {my @pump = @path[$pumped{$state}..$#path]; # Long path minus short path
413 17 50       320 push $$dfa{$state}->pump->@*, [@pump] if @pump; # Add the pumping lemma
414             }
415             else # State not visited
416 34         602 {my $transitions = $$dfa{$state}->transitions; # Transitions hash
417 34         151 $pumped{$state} = @path; # Record visit to this state
418 34         118 for my $t(sort keys %$transitions) # Visit each adjacent states
419 37         178 {&$pump($$transitions{$t}, @path, $t); # Visit adjacent state
420             }
421             }
422             };
423              
424             &$pump(0); # Find the pumping lemmas for each node
425              
426             my %visited; # States visited => [symbols along path to state]
427             my $visit; $visit = sub # Find non pumped paths
428 51     51   108 {my ($state, @path) = @_; # Current state, sequence to get here
429 51 100 100     716 if (@path and $$dfa{$state}->final) # Non empty path leads to final state
430 20         405 {push $$dfa{$state}->sequence->@*, [@path] # Record path as a sequence that leads to a final state
431             }
432 51 100       298 if (!defined $visited{$state}) # States not yet visited
433 34         557 {my $transitions = $$dfa{$state}->transitions; # Transitions hash
434 34         157 $visited{$state} = [@path];
435 34         107 for my $symbol(sort keys %$transitions) # Visit each adjacent states
436 37         60 {my $s = $$transitions{$symbol}; # Adjacent state
437 37         180 &$visit($$transitions{$symbol}, @path, [$state, $symbol, $s]); # Visit adjacent state
438             }
439 34         88 delete $visited{$state};
440             }
441             };
442              
443             &$visit(0); # Find unpumped paths
444              
445             my @choices; # Construct regular expression as choices amongst pumped paths
446             for my $s(sort keys %$dfa) # Each state name
447             {my $state = $$dfa{$s}; # Each state
448              
449             if ($state->final) # Final state
450             {for my $path($state->sequence->@*) # Each path to this state
451             {my @seq;
452              
453             for my $step(@$path) # Current state, sequence to get here
454             {my ($from, $symbol, $to) = @$step; # States not yet visited
455             push @seq, &$fe($symbol) unless $from == $to; # Add element unless looping in a final state
456              
457             if (my $pump = $$dfa{$to}->pump) # Add pumping lemmas
458             {my @c;
459             for my $p(@$pump) # Add each pumping lemma for this state
460             {if (@$p == 1) # Format one element
461             {push @c, &$fe($$p[0]);
462             }
463             else # Sequence of several elements
464             {push @c, &$fs(map {&$fe($_)} @$p);
465             }
466             }
467              
468             my sub insertPump($) # Insert a pumping path
469             {my ($c) = @_; # Pumping path, choice
470             my $z = &$fz($c);
471             if (@seq and $seq[-1] eq $c) # Sequence plus zero or more of same sequence is one or more of sequence
472             {$seq[-1] = &$fo($c)
473             }
474             elsif (!@seq or $seq[-1] ne $z && $seq[-1] ne &$fo($c)) # Suppress multiple zero or more or one and more followed by zero or more of the same item
475             {push @seq, $z
476             }
477             }
478              
479             if (@c == 1) {insertPump $c[0]} # Only one pumping lemma
480             else {insertPump &$fc(@c)} # Multiple pumping lemmas
481             }
482             }
483             push @choices, &$fs(@seq); # Combine choice of sequences from start state
484             }
485             }
486             };
487              
488             my sub debracket($) # Remove duplicated outer brackets
489             {my ($re) = @_; # Re
490             while(length($re) > 1 and substr($re, 0, 1) eq '(' and
491             substr($re, -1, 1) eq ')')
492             {$re = substr($re, 1, -1)
493             }
494             $re
495             }
496              
497             return debracket $choices[0] if @choices == 1; # No wrapping needed if only one choice
498             debracket &$fc(map {&$fs($_)} @choices)
499             } # printAsExpr2
500              
501             sub printAsExpr($) # Print a B<$dfa> as an expression.
502 4     4 1 13 {my ($dfa) = @_; # DFA
503              
504             my %options = # Formatting methods
505             (element => sub
506 14     14   25 {my ($e) = @_;
507 14         44 qq/element(q($e))/
508             },
509             choice => sub
510 2     2   8 {my $c = join ', ', @_;
511 2         7 qq/choice($c)/
512             },
513             sequence => sub
514 4     4   17 {my $s = join ', ', @_;
515 4         18 qq/sequence($s)/
516             },
517             zeroOrMore => sub
518 4     4   10 {my ($z) = @_;
519 4         15 qq/zeroOrMore($z)/
520             },
521             oneOrMore => sub
522 4     4   10 {my ($o) = @_;
523 4         22 qq/oneOrMore($o)/
524             },
525 4         76 );
526              
527 4         39 my $r = printAsExpr2($dfa, %options); # Create an expression for the DFA
528 4         13 if (1) # Remove any unnecessary outer sequence
529 4         9 {my $s = q/sequence(/;
530 4 50 33     26 if (substr($r, 0, length($s)) eq $s and substr($r, -1, 1) eq q/)/)
531 4         15 {$r = substr($r, length($s), -1)
532             }
533             }
534             $r
535 4         76 }
536              
537             sub printAsRe($) # Print a B<$dfa> as a regular expression.
538 10     10 1 22 {my ($dfa) = @_; # DFA
539              
540             my %options = # Formatting methods
541 56     56   89 (element => sub {my ($e) = @_; $e},
  56         115  
542             choice => sub
543 12     12   71 {my %c = map {$_=>1} @_;
  28         71  
544 12         47 my @c = sort keys %c;
545 12 100       54 return $c[0] if @c == 1;
546 2         6 my $c = join ' | ', @c;
547 2         9 qq/($c)/
548             },
549             sequence => sub
550 34 100   34   104 {return $_[0] if @_ == 1;
551 8         23 my $s = join ' ', @_;
552 8         29 qq/($s)/
553             },
554 24     24   40 zeroOrMore => sub {my ($z) = @_; qq/$z*/},
  24         59  
555 22     22   39 oneOrMore => sub {my ($z) = @_; qq/$z+/},
  22         131  
556 10         146 );
557              
558 10         68 printAsExpr2($dfa, %options); # Create an expression for the DFA
559             }
560              
561             sub parseDtdElementAST($) # Convert the Dtd Element definition in B<$string> to a parse tree.
562 3     3 1 8 {my ($string) = @_; # String representation of DTD element expression
563             package dtdElementDfa;
564 1     1   18 use Carp;
  1         2  
  1         78  
565 1     1   6 use Data::Dump qw(dump);
  1         2  
  1         55  
566 1     1   8 use Data::Table::Text qw(:all);
  1         2  
  1         1715  
567              
568             sub element($) # An element
569 11     11   21 {my ($e) = @_;
570 11         95 bless ['element', $e]
571             }
572              
573             sub multiply # Zero or more, one or more, optional
574 3     3   10 {my ($l, $r) = @_;
575             my $o = sub
576 3 50   3   16 {return q(zeroOrMore) if $r eq q(*);
577 0 0       0 return q(oneOrMore) if $r eq q(+);
578 0 0       0 return q(optional) if $r eq q(?);
579 0         0 confess "Unexpected multiplier $r";
580 3         20 }->();
581 3         24 bless [$o, $l];
582             }
583              
584             sub choice # Choice
585 2     2   6 {my ($l, $r) = @_;
586 2         11 bless ["choice", $l, $r];
587             }
588              
589             sub sequence # Sequence
590 6     6   12 {my ($l, $r) = @_;
591 6         41 bless ["sequence", $l, $r];
592             }
593              
594             use overload
595 1         12 '**' => \&multiply,
596             '*' => \&choice,
597 1     1   9 '+' => \&sequence;
  1         1  
598              
599             sub parse($) # Parse a string
600 3     3   9 {my ($S) = @_; # String
601 3         7 my $s = $S;
602 3         9 $s =~ s(#PCDATA) (PCDATA)gs;
603 3         45 $s =~ s(((\w|-)+)) (element(q($1)))gs; # Word
604 3         32 $s =~ s(\)\s*([*+?])) (\) ** q($1))gs;
605 3         13 $s =~ s(\|) (*)gs;
606 3         18 $s =~ s(,\s+) (+)gs;
607 3         322 my $r = eval $s;
608 3 50       19 say STDERR "$@\n$S\n$s\n" if $@;
609 3         18 $r
610             }
611              
612 3         13 parse($string) # Parse the DTD element expression into a tree
613             }
614              
615             sub parseDtdElement($) # Convert the L <>DTD> Element definition in the specified B<$string> to a DFA.
616 2     2 1 6 {my ($string) = @_; # DTD element expression string
617 2         9 fromExpr parseDtdElementAST($string) # Create a DFA from a parse tree created from the Dtd element expression string
618             }
619              
620             #D1 Paths # Find paths in a DFA.
621              
622             sub subArray($$) #P Whether the second array is contained within the first.
623 36     36 1 64 {my ($A, $B) = @_; # Exterior array, interior array
624 36 100       183 return 1 unless @$B; # The empty set is contained by every set
625 27         47 my @a = @$A;
626 27         50 my ($b, @b) = @$B; # Next element to match in the second array
627 27         48 while(@a) # Each start position in the first array
628 48         71 {my $a = shift @a;
629 48 100 100     139 return 1 if $a eq $b and __SUB__->([@a], [@b]) # Current position matches and remainder of second array is contained in the remainder of the first array
630             }
631             0
632 9         33 }
633              
634             sub removeLongerPathsThatContainShorterPaths($) #P Remove longer paths that contain shorter paths.
635 0     0 1 0 {my ($paths) = @_; # Paths
636 0         0 my @paths = sort keys %$paths; # Paths in definite order
637 0         0 for my $p(@paths) # Each long path
638 0 0       0 {next unless exists $$paths{$p}; # Long path still present
639 0         0 for my $q(@paths) # Each short path
640 0 0       0 {next unless exists $$paths{$q}; # Short path still present
641 0 0 0     0 delete $$paths{$p} if $p ne $q and subArray($$paths{$p}, $$paths{$q}); # Remove long path if it contains short path
642             }
643             }
644             }
645              
646             sub shortPaths($) # Find a set of paths that reach every state in the DFA with each path terminating in a final state.
647             {my ($dfa) = @_; # DFA
648             my %paths; # {path => [transitions]} the transitions in each path
649             my @path; # Transitions in the current path
650             my %seen; # {state} states already seen in this path
651              
652             my sub check($) # Check remaining states from the specified state
653             {my ($state) = @_; # Check from this state
654             $paths{join ' ', @path} = [@path] if $$dfa{$state}->final; # Save non repeating path at a final state
655             my $transitions = $$dfa{$state}->transitions; # Transitions from state being checked
656             for my $symbol(sort keys %$transitions) # Each transition from the state being checked not already seen
657             {my $new = $$transitions{$symbol}; # New state reached by transition from state being checked
658             push @path, $symbol; # Add transition to path
659             if (!$seen{$new}) # New state has not been visited yet
660             {$seen{$new}++; # Mark state as already been seen
661             __SUB__->($new); # Check from new state
662             delete $seen{$new} # Mark state as not already been seen
663             }
664             pop @path; # Remove current transition from path
665             }
666             }
667              
668             $seen{0}++; # Mark start state as seen
669             check(0); # Start at the start state
670              
671             dumpFile($logFile, \%paths) if -e $logFile; # Log the result if requested
672              
673             \%paths # Hash of non repeating paths
674             } # shortPaths
675              
676             sub longPaths($) # Find a set of paths that traverse each transition in the DFA with each path terminating in a final state.
677             {my ($dfa) = @_; # DFA
678             my %paths; # {path => [transitions]} the transitions in each path
679             my @path; # Transitions in the current path
680             my %seen; # {state} states already seen in this path so we can avoid loops
681              
682             my sub check($) # Check remaining states from the specified state
683             {my ($state) = @_; # Check from this state
684             $paths{join ' ', @path} = [@path] if $$dfa{$state}->final; # Save non repeating path at a final state
685             my $transitions = $$dfa{$state}->transitions; # Transitions from state being checked
686             for my $symbol(sort keys %$transitions) # Each transition from the state being checked not already seen
687             {my $new = $$transitions{$symbol}; # New state reached by transition from state being checked
688             if (!$seen{$state}{$symbol}++) # Mark state as already been seen
689             {push @path, $symbol; # Add transition to path
690             __SUB__->($new); # Check from new state
691             pop @path; # Remove current transition from path
692             delete $seen{$state}{$symbol} # Mark state as not already been seen
693             }
694             }
695             }
696              
697             check(0); # Start at the start state
698              
699             dumpFile($logFile, \%paths) if -e $logFile; # Log the result if requested
700              
701             \%paths # Hash of non repeating paths
702             } # longPaths
703              
704             sub loops($) # Find the non repeating loops from each state.
705             {my ($dfa) = @_; # DFA
706             my %loops; #{state=>[[non repeating loop through states]]}
707              
708             my sub loopsFromState($) # Find loops starting at this state
709             {my ($start) = @_; # Check from this state
710              
711             my @path; # Transitions in the current path
712             my %seen; # {state} states already seen in this path
713              
714             my sub check($) # Check remaining states from the specified state
715             {my ($state) = @_; # Check from this state
716             my $transitions = $$dfa{$state}->transitions; # Transitions from state being checked
717             for my $symbol(sort keys %$transitions) # Each transition from the state being checked not already seen
718             {my $new = $$transitions{$symbol}; # New state reached by transition from state being checked
719             push @path, $symbol; # Add transition to path
720             push $loops{$start}->@*, [@path] if $new == $start; # Save loop
721             if (!$seen{$new}) # New state has not been visited yet
722             {$seen{$new}++; # Mark state as already been seen
723             __SUB__->($new); # Check from new state
724             delete $seen{$new} # Mark state as not already been seen
725             }
726             pop @path; # Remove current transition from path
727             }
728             }
729              
730             $seen{$start}++; # Mark start state as seen
731             check($start); # Start at the start state
732             }
733              
734             loopsFromState($_) for sort keys %$dfa; # Loops from each state
735             dumpFile($logFile, \%loops) if -e $logFile; # Log the result if requested
736              
737             \%loops
738             } # loops
739              
740             #D1 Parser methods # Use the DFA to parse a sequence of symbols
741              
742             sub Data::DFA::Parser::accept($$) # Using the specified B<$parser>, accept the next symbol drawn from the symbol set if possible by moving to a new state otherwise confessing with a helpful message that such a move is not possible.
743 230     230   388 {my ($parser, $symbol) = @_; # DFA Parser, next symbol to be processed by the finite state automaton
744 230         3933 my $dfa = $parser->dfa; # Dfa
745 230         4137 my $observer = $parser->observer; # Optional observer
746 230         4111 my $transitions = $$dfa{$parser->state}->transitions; # Transitions for current state
747 230         4595 my $nextState = $$transitions{$symbol}; # Target state transitioned to
748 230 100       501 if (defined $nextState) # Valid target state
749 197 100       484 {$observer->($parser, $symbol, $nextState) if $observer; # Log transition if required
750 197         3108 $parser->state = $nextState; # Enter next state
751 197         706 push @{$parser->processed}, $symbol; # Save transition symbol
  197         3008  
752 197         1041 return 1; # Success
753             }
754             else # No such transition
755 33         162 {$parser->{next} = [my @next = sort keys %$transitions]; # Valid symbols
756 33         61 my @processed = @{$parser->processed}; # Symbols processed successfully
  33         570  
757 33         666 $parser->fail = $symbol; # Failing symbol
758              
759 33         203 push my @m, "Already processed: ". join(' ', @processed); # Create error message
760              
761 33 100       91 if (scalar(@next) > 0) # Expected
762 31         77 {push @m, "Expected one of : ". join(' ', @next);
763             }
764             else
765 2         3 {push @m, "Expected nothing more.";
766             }
767              
768 33         76 push @m, "But found : ". $symbol, ""; # Found
769              
770 33         226 die join "\n", @m;
771             }
772             }
773              
774             sub Data::DFA::Parser::final($) # Returns whether the specified B<$parser> is in a final state or not.
775 72     72   143 {my ($parser) = @_; # DFA Parser
776 72         1173 my $dfa = $parser->dfa;
777 72         1317 my $state = $parser->state;
778 72         1357 $$dfa{$state}->final
779             }
780              
781             sub Data::DFA::Parser::next($) # Returns an array of symbols that would be accepted in the current state by the specified B<$parser>.
782 1     1   4 {my ($parser) = @_; # DFA Parser
783 1         19 my $dfa = $parser->dfa;
784 1         22 my $state = $parser->state;
785 1         21 my $transitions = $$dfa{$state}->transitions;
786 1         16 sort keys %$transitions
787             }
788              
789             sub Data::DFA::Parser::accepts($@) # Confirm that the specified B<$parser> accepts an array representing a sequence of symbols.
790 103     103   7570 {my ($parser, @symbols) = @_; # DFA Parser, array of symbols
791 103         205 for my $symbol(@symbols) # Parse the symbols
792 227         332 {eval {$parser->accept($symbol)}; # Try to accept a symbol
  227         428  
793 227 50 66     613 confess "Error in observer: $@" if $@ and $@ !~ m(Already processed);
794 227 100       583 return 0 if $@; # Failed
795             }
796             $parser->final # Confirm we are in an end state
797 71         164 }
798              
799             #D1 Data Structures # Data structures used by this package.
800              
801             #D0
802             #-------------------------------------------------------------------------------
803             # Export
804             #-------------------------------------------------------------------------------
805              
806 1     1   1931 use Exporter qw(import);
  1         3  
  1         34  
807              
808 1     1   4 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  1         12  
  1         478  
809              
810             @ISA = qw(Exporter);
811             @EXPORT_OK = qw(
812             choice
813             fromExpr
814             fromNfa
815             element
816             except
817             oneOrMore optional
818             parser
819             print
820             sequence
821             zeroOrMore
822             );
823             %EXPORT_TAGS = (all=>[@EXPORT, @EXPORT_OK]);
824              
825             # podDocumentation
826              
827             =pod
828              
829             =encoding utf-8
830              
831             =head1 Name
832              
833             Data::DFA - Deterministic finite state parser from regular expression.
834              
835             =head1 Synopsis
836              
837             Create a deterministic finite state parser to recognize sequences of symbols
838             that match a given regular expression.
839              
840             To recognize sequences of symbols drawn from B<'a'..'e'> that match
841             the regular expression: B:
842              
843             Create the parser:
844              
845             my $dfa = fromExpr
846             ("a",
847             oneOrMore
848             (choice(qw(b c))),
849             optional("d"),
850             "e"
851             );
852              
853             Recognize sequences of symbols:
854              
855             ok $dfa->parser->accepts(qw(a b e));
856             ok $dfa->parser->accepts(qw(a b c e));
857             ok !$dfa->parser->accepts(qw(a d));
858             ok !$dfa->parser->accepts(qw(a c d));
859             ok $dfa->parser->accepts(qw(a c d e));
860              
861             Print the transition table:
862              
863             is_deeply $dfa->print("a(b|c)+d?e"), <
864             a(b|c)+d?e
865             State Final Symbol Target Final
866             1 0 a 4
867             2 1 b 1
868             3 c 1
869             4 d 2
870             5 e 3 1
871             6 2 e 3 1
872             7 3 1 1
873             8 4 b 1
874             9 c 1
875             END
876              
877             Discover why a sequence cannot be recognized:
878              
879             my $parser = $dfa->parser;
880              
881             eval { $parser->accept($_) } for qw(a b a);
882              
883             is_deeply $@, <
884             Already processed: a b
885             Expected one of : b c d e
886             But found : a
887             END
888              
889             is_deeply $parser->fail, qq(a);
890             is_deeply [$parser->next], [qw(b c d e)];
891             is_deeply $parser->processed, [qw(a b)];
892              
893             ok !$parser->final;
894              
895             To construct and parse regular expressions in the format used by B
896             definitions in Ls used to validate L:
897              
898             is_deeply
899             parseDtdElement(q(a, b*, c))->printAsExpr,
900             q/element(q(a)), zeroOrMore(element(q(b))), element(q(c))/;
901              
902             =head1 Description
903              
904             Deterministic finite state parser from regular expression.
905              
906              
907             Version 20201030.
908              
909              
910             The following sections describe the methods in each functional area of this
911             module. For an alphabetic listing of all methods by name see L.
912              
913              
914              
915             =head1 Construct regular expression
916              
917             Construct a regular expression that defines the language to be parsed using the following combining operations:
918              
919             =head2 element($label)
920              
921             One element.
922              
923             Parameter Description
924             1 $label Transition symbol
925              
926             B
927              
928              
929             my $dfa = fromExpr # Construct DFA
930             ("a",
931             oneOrMore(choice(qw(b c))),
932             optional("d"),
933             "e"
934             );
935             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
936            
937             my $dfa = fromExpr # Construct DFA
938            
939             (element("a"), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
940              
941            
942             oneOrMore(choice(element("b"), element("c"))), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
943              
944            
945             optional(element("d")), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
946              
947            
948             element("e") # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
949              
950             );
951             my $parser = $dfa->parser; # New parser
952            
953             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
954            
955             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
956             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
957            
958             ok !$parser->final; # Not in a final state
959            
960             ok $dfa->dumpAsJson eq <
961             {
962             "finalStates" : {
963             "0" : null,
964             "1" : null,
965             "2" : null,
966             "4" : null,
967             "5" : 1
968             },
969             "transitions" : {
970             "0" : {
971             "a" : "2"
972             },
973             "1" : {
974             "b" : "1",
975             "c" : "1",
976             "d" : "4",
977             "e" : "5"
978             },
979             "2" : {
980             "b" : "1",
981             "c" : "1"
982             },
983             "4" : {
984             "e" : "5"
985             }
986             }
987             }
988             END
989            
990              
991             This is a static method and so should either be imported or invoked as:
992              
993             Data::DFA::element
994              
995              
996             =head2 sequence(@elements)
997              
998             Sequence of elements.
999              
1000             Parameter Description
1001             1 @elements Elements
1002              
1003             B
1004              
1005              
1006             my $dfa = fromExpr # Construct DFA
1007            
1008             (zeroOrMore(sequence('a'..'c')), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1009              
1010             except('b'..'d')
1011             );
1012            
1013             ok $dfa->parser->accepts(qw(a b c a ));
1014             ok !$dfa->parser->accepts(qw(a b c a b));
1015             ok !$dfa->parser->accepts(qw(a b c a c));
1016             ok !$dfa->parser->accepts(qw(a c c a b c));
1017            
1018            
1019             ok $dfa->print(q(Test)) eq <
1020             Test
1021             State Final Symbol Target Final
1022             1 0 a 1 1
1023             2 1 1 b 2
1024             3 2 c 0
1025             END
1026            
1027              
1028             This is a static method and so should either be imported or invoked as:
1029              
1030             Data::DFA::sequence
1031              
1032              
1033             =head2 optional(@element)
1034              
1035             An optional sequence of element.
1036              
1037             Parameter Description
1038             1 @element Elements
1039              
1040             B
1041              
1042              
1043             my $dfa = fromExpr # Construct DFA
1044             ("a",
1045             oneOrMore(choice(qw(b c))),
1046            
1047             optional("d"), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1048              
1049             "e"
1050             );
1051             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1052            
1053             my $dfa = fromExpr # Construct DFA
1054             (element("a"),
1055             oneOrMore(choice(element("b"), element("c"))),
1056            
1057             optional(element("d")), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1058              
1059             element("e")
1060             );
1061             my $parser = $dfa->parser; # New parser
1062            
1063             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1064            
1065             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1066             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1067            
1068             ok !$parser->final; # Not in a final state
1069            
1070             ok $dfa->dumpAsJson eq <
1071             {
1072             "finalStates" : {
1073             "0" : null,
1074             "1" : null,
1075             "2" : null,
1076             "4" : null,
1077             "5" : 1
1078             },
1079             "transitions" : {
1080             "0" : {
1081             "a" : "2"
1082             },
1083             "1" : {
1084             "b" : "1",
1085             "c" : "1",
1086             "d" : "4",
1087             "e" : "5"
1088             },
1089             "2" : {
1090             "b" : "1",
1091             "c" : "1"
1092             },
1093             "4" : {
1094             "e" : "5"
1095             }
1096             }
1097             }
1098             END
1099            
1100              
1101             This is a static method and so should either be imported or invoked as:
1102              
1103             Data::DFA::optional
1104              
1105              
1106             =head2 zeroOrMore(@element)
1107              
1108             Zero or more repetitions of a sequence of elements.
1109              
1110             Parameter Description
1111             1 @element Elements
1112              
1113             B
1114              
1115              
1116             my $dfa = fromExpr # Construct DFA
1117            
1118             (zeroOrMore(sequence('a'..'c')), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1119              
1120             except('b'..'d')
1121             );
1122            
1123             ok $dfa->parser->accepts(qw(a b c a ));
1124             ok !$dfa->parser->accepts(qw(a b c a b));
1125             ok !$dfa->parser->accepts(qw(a b c a c));
1126             ok !$dfa->parser->accepts(qw(a c c a b c));
1127            
1128            
1129             ok $dfa->print(q(Test)) eq <
1130             Test
1131             State Final Symbol Target Final
1132             1 0 a 1 1
1133             2 1 1 b 2
1134             3 2 c 0
1135             END
1136            
1137              
1138             This is a static method and so should either be imported or invoked as:
1139              
1140             Data::DFA::zeroOrMore
1141              
1142              
1143             =head2 oneOrMore(@element)
1144              
1145             One or more repetitions of a sequence of elements.
1146              
1147             Parameter Description
1148             1 @element Elements
1149              
1150             B
1151              
1152              
1153             my $dfa = fromExpr # Construct DFA
1154             ("a",
1155            
1156             oneOrMore(choice(qw(b c))), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1157              
1158             optional("d"),
1159             "e"
1160             );
1161             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1162            
1163             my $dfa = fromExpr # Construct DFA
1164             (element("a"),
1165            
1166             oneOrMore(choice(element("b"), element("c"))), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1167              
1168             optional(element("d")),
1169             element("e")
1170             );
1171             my $parser = $dfa->parser; # New parser
1172            
1173             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1174            
1175             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1176             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1177            
1178             ok !$parser->final; # Not in a final state
1179            
1180             ok $dfa->dumpAsJson eq <
1181             {
1182             "finalStates" : {
1183             "0" : null,
1184             "1" : null,
1185             "2" : null,
1186             "4" : null,
1187             "5" : 1
1188             },
1189             "transitions" : {
1190             "0" : {
1191             "a" : "2"
1192             },
1193             "1" : {
1194             "b" : "1",
1195             "c" : "1",
1196             "d" : "4",
1197             "e" : "5"
1198             },
1199             "2" : {
1200             "b" : "1",
1201             "c" : "1"
1202             },
1203             "4" : {
1204             "e" : "5"
1205             }
1206             }
1207             }
1208             END
1209            
1210              
1211             This is a static method and so should either be imported or invoked as:
1212              
1213             Data::DFA::oneOrMore
1214              
1215              
1216             =head2 choice(@elements)
1217              
1218             Choice from amongst one or more elements.
1219              
1220             Parameter Description
1221             1 @elements Elements to be chosen from
1222              
1223             B
1224              
1225              
1226             my $dfa = fromExpr # Construct DFA
1227             ("a",
1228            
1229             oneOrMore(choice(qw(b c))), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1230              
1231             optional("d"),
1232             "e"
1233             );
1234             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1235            
1236             my $dfa = fromExpr # Construct DFA
1237             (element("a"),
1238            
1239             oneOrMore(choice(element("b"), element("c"))), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1240              
1241             optional(element("d")),
1242             element("e")
1243             );
1244             my $parser = $dfa->parser; # New parser
1245            
1246             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1247            
1248             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1249             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1250            
1251             ok !$parser->final; # Not in a final state
1252            
1253             ok $dfa->dumpAsJson eq <
1254             {
1255             "finalStates" : {
1256             "0" : null,
1257             "1" : null,
1258             "2" : null,
1259             "4" : null,
1260             "5" : 1
1261             },
1262             "transitions" : {
1263             "0" : {
1264             "a" : "2"
1265             },
1266             "1" : {
1267             "b" : "1",
1268             "c" : "1",
1269             "d" : "4",
1270             "e" : "5"
1271             },
1272             "2" : {
1273             "b" : "1",
1274             "c" : "1"
1275             },
1276             "4" : {
1277             "e" : "5"
1278             }
1279             }
1280             }
1281             END
1282            
1283              
1284             This is a static method and so should either be imported or invoked as:
1285              
1286             Data::DFA::choice
1287              
1288              
1289             =head2 except(@elements)
1290              
1291             Choice from amongst all symbols except the ones mentioned
1292              
1293             Parameter Description
1294             1 @elements Elements to be chosen from
1295              
1296             B
1297              
1298              
1299             my $dfa = fromExpr # Construct DFA
1300             (zeroOrMore(sequence('a'..'c')),
1301            
1302             except('b'..'d') # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1303              
1304             );
1305            
1306             ok $dfa->parser->accepts(qw(a b c a ));
1307             ok !$dfa->parser->accepts(qw(a b c a b));
1308             ok !$dfa->parser->accepts(qw(a b c a c));
1309             ok !$dfa->parser->accepts(qw(a c c a b c));
1310            
1311            
1312             ok $dfa->print(q(Test)) eq <
1313             Test
1314             State Final Symbol Target Final
1315             1 0 a 1 1
1316             2 1 1 b 2
1317             3 2 c 0
1318             END
1319            
1320              
1321             This is a static method and so should either be imported or invoked as:
1322              
1323             Data::DFA::except
1324              
1325              
1326             =head2 fromExpr(@expression)
1327              
1328             Create a DFA parser from a regular B<@expression>.
1329              
1330             Parameter Description
1331             1 @expression Regular expression
1332              
1333             B
1334              
1335              
1336            
1337             my $dfa = fromExpr # Construct DFA # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1338              
1339             ("a",
1340             oneOrMore(choice(qw(b c))),
1341             optional("d"),
1342             "e"
1343             );
1344             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1345            
1346            
1347             my $dfa = fromExpr # Construct DFA # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1348              
1349             (element("a"),
1350             oneOrMore(choice(element("b"), element("c"))),
1351             optional(element("d")),
1352             element("e")
1353             );
1354             my $parser = $dfa->parser; # New parser
1355            
1356             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1357            
1358             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1359             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1360            
1361             ok !$parser->final; # Not in a final state
1362            
1363             ok $dfa->dumpAsJson eq <
1364             {
1365             "finalStates" : {
1366             "0" : null,
1367             "1" : null,
1368             "2" : null,
1369             "4" : null,
1370             "5" : 1
1371             },
1372             "transitions" : {
1373             "0" : {
1374             "a" : "2"
1375             },
1376             "1" : {
1377             "b" : "1",
1378             "c" : "1",
1379             "d" : "4",
1380             "e" : "5"
1381             },
1382             "2" : {
1383             "b" : "1",
1384             "c" : "1"
1385             },
1386             "4" : {
1387             "e" : "5"
1388             }
1389             }
1390             }
1391             END
1392            
1393              
1394             This is a static method and so should either be imported or invoked as:
1395              
1396             Data::DFA::fromExpr
1397              
1398              
1399             =head2 univalent($dfa)
1400              
1401             Check that the L is univalent: a univalent L has a mapping from symbols to states. Returns a hash showing the mapping from symbols to states if the L is univalent, else returns B.
1402              
1403             Parameter Description
1404             1 $dfa Dfa to check
1405              
1406             =head1 Print
1407              
1408             Pritn the Dfa in various ways.
1409              
1410             =head2 print($dfa, $title)
1411              
1412             Print the specified B<$dfa> using the specified B<$title>.
1413              
1414             Parameter Description
1415             1 $dfa DFA
1416             2 $title Optional title
1417              
1418             B
1419              
1420              
1421             my $dfa = fromExpr # Construct DFA
1422             (zeroOrMore(sequence('a'..'c')),
1423             except('b'..'d')
1424             );
1425            
1426             ok $dfa->parser->accepts(qw(a b c a ));
1427             ok !$dfa->parser->accepts(qw(a b c a b));
1428             ok !$dfa->parser->accepts(qw(a b c a c));
1429             ok !$dfa->parser->accepts(qw(a c c a b c));
1430            
1431            
1432            
1433             ok $dfa->print(q(Test)) eq <
1434              
1435             Test
1436             State Final Symbol Target Final
1437             1 0 a 1 1
1438             2 1 1 b 2
1439             3 2 c 0
1440             END
1441            
1442              
1443             =head2 symbols($dfa)
1444              
1445             Return an array of all the symbols accepted by a B<$dfa>.
1446              
1447             Parameter Description
1448             1 $dfa DFA
1449              
1450             B
1451              
1452              
1453             my $dfa = fromExpr # Construct DFA
1454             ("a",
1455             oneOrMore(choice(qw(b c))),
1456             optional("d"),
1457             "e"
1458             );
1459            
1460             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1461              
1462            
1463              
1464             =head2 parser($dfa, $observer)
1465              
1466             Create a parser from a B<$dfa> constructed from a regular expression.
1467              
1468             Parameter Description
1469             1 $dfa Deterministic finite state automaton
1470             2 $observer Optional observer
1471              
1472             B
1473              
1474              
1475             my $dfa = fromExpr # Construct DFA
1476             ("a",
1477             oneOrMore(choice(qw(b c))),
1478             optional("d"),
1479             "e"
1480             );
1481             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1482            
1483             my $dfa = fromExpr # Construct DFA
1484             (element("a"),
1485             oneOrMore(choice(element("b"), element("c"))),
1486             optional(element("d")),
1487             element("e")
1488             );
1489            
1490             my $parser = $dfa->parser; # New parser # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1491              
1492            
1493            
1494             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1495              
1496            
1497            
1498             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1499              
1500            
1501             is_deeply $parser->processed, [qw(a b)]; # Symbols processed # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1502              
1503            
1504            
1505             ok !$parser->final; # Not in a final state # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1506              
1507            
1508             ok $dfa->dumpAsJson eq <
1509             {
1510             "finalStates" : {
1511             "0" : null,
1512             "1" : null,
1513             "2" : null,
1514             "4" : null,
1515             "5" : 1
1516             },
1517             "transitions" : {
1518             "0" : {
1519             "a" : "2"
1520             },
1521             "1" : {
1522             "b" : "1",
1523             "c" : "1",
1524             "d" : "4",
1525             "e" : "5"
1526             },
1527             "2" : {
1528             "b" : "1",
1529             "c" : "1"
1530             },
1531             "4" : {
1532             "e" : "5"
1533             }
1534             }
1535             }
1536             END
1537            
1538              
1539             =head2 dumpAsJson($dfa)
1540              
1541             Create a JSON string representing a B<$dfa>.
1542              
1543             Parameter Description
1544             1 $dfa Deterministic finite state automaton generated from an expression
1545              
1546             B
1547              
1548              
1549             my $dfa = fromExpr # Construct DFA
1550             ("a",
1551             oneOrMore(choice(qw(b c))),
1552             optional("d"),
1553             "e"
1554             );
1555             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1556            
1557              
1558             =head2 printAsExpr($dfa)
1559              
1560             Print a B<$dfa> as an expression.
1561              
1562             Parameter Description
1563             1 $dfa DFA
1564              
1565             B
1566              
1567              
1568             if (1)
1569             {my $e = q/element(q(a)), zeroOrMore(choice(element(q(b)), element(q(c)))), element(q(d))/;
1570             my $d = eval qq/fromExpr($e)/;
1571             confess $@ if $@;
1572            
1573            
1574             my $E = $d->printAsExpr; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1575              
1576             ok $e eq $E;
1577            
1578             my $R = $d->printAsRe;
1579             ok $R eq q(a (b | c)* d);
1580            
1581             my $D = parseDtdElement(q(a, (b | c)*, d));
1582            
1583             my $S = $D->printAsExpr; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1584              
1585             ok $e eq $S;
1586             }
1587            
1588              
1589             =head2 printAsRe($dfa)
1590              
1591             Print a B<$dfa> as a regular expression.
1592              
1593             Parameter Description
1594             1 $dfa DFA
1595              
1596             B
1597              
1598              
1599             if (1)
1600             {my $e = q/element(q(a)), zeroOrMore(choice(element(q(b)), element(q(c)))), element(q(d))/;
1601             my $d = eval qq/fromExpr($e)/;
1602             confess $@ if $@;
1603            
1604             my $E = $d->printAsExpr;
1605             ok $e eq $E;
1606            
1607            
1608             my $R = $d->printAsRe; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1609              
1610             ok $R eq q(a (b | c)* d);
1611            
1612             my $D = parseDtdElement(q(a, (b | c)*, d));
1613             my $S = $D->printAsExpr;
1614             ok $e eq $S;
1615             }
1616            
1617              
1618             =head2 parseDtdElementAST($string)
1619              
1620             Convert the Dtd Element definition in B<$string> to a parse tree.
1621              
1622             Parameter Description
1623             1 $string String representation of DTD element expression
1624              
1625             B
1626              
1627              
1628             if (1)
1629            
1630             {is_deeply unbless(parseDtdElementAST(q(a, (b | c)*, d))), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1631              
1632             ["sequence",
1633             ["sequence",
1634             ["element", "a"],
1635             ["zeroOrMore", ["choice", ["element", "b"], ["element", "c"]]],
1636             ],
1637             ["element", "d"],
1638             ];
1639             }
1640            
1641              
1642             =head2 parseDtdElement($string)
1643              
1644             Convert the L <>DTD> Element definition in the specified B<$string> to a DFA.
1645              
1646             Parameter Description
1647             1 $string DTD element expression string
1648              
1649             B
1650              
1651              
1652             if (1)
1653             {my $e = q/element(q(a)), zeroOrMore(choice(element(q(b)), element(q(c)))), element(q(d))/;
1654             my $d = eval qq/fromExpr($e)/;
1655             confess $@ if $@;
1656            
1657             my $E = $d->printAsExpr;
1658             ok $e eq $E;
1659            
1660             my $R = $d->printAsRe;
1661             ok $R eq q(a (b | c)* d);
1662            
1663            
1664             my $D = parseDtdElement(q(a, (b | c)*, d)); # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1665              
1666             my $S = $D->printAsExpr;
1667             ok $e eq $S;
1668             }
1669            
1670              
1671             =head1 Paths
1672              
1673             Find paths in a DFA.
1674              
1675             =head2 shortPaths($dfa)
1676              
1677             Find a set of paths that reach every state in the DFA with each path terminating in a final state.
1678              
1679             Parameter Description
1680             1 $dfa DFA
1681              
1682             B
1683              
1684              
1685             if (1)
1686             {my $dfa = fromExpr
1687             (zeroOrMore("a"),
1688             oneOrMore("b"),
1689             optional("c"),
1690             "d"
1691             );
1692            
1693             ok !$dfa->parser->accepts(qw());
1694             ok !$dfa->parser->accepts(qw(a));
1695             ok !$dfa->parser->accepts(qw(b));
1696             ok !$dfa->parser->accepts(qw(c));
1697             ok !$dfa->parser->accepts(qw(d));
1698             ok $dfa->parser->accepts(qw(b c d));
1699             ok $dfa->parser->accepts(qw(b d));
1700             ok !$dfa->parser->accepts(qw(b a));
1701             ok $dfa->parser->accepts(qw(b b d));
1702            
1703            
1704             is_deeply shortPaths ($dfa), { "b c d" => ["b", "c", "d"], "b d" => ["b", "d"] }; # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1705              
1706             is_deeply longPaths($dfa),
1707             {"a b b c d" => ["a", "b", "b", "c", "d"],
1708             "a b b d" => ["a", "b", "b", "d"],
1709             "a b c d" => ["a" .. "d"],
1710             "a b d" => ["a", "b", "d"],
1711             "b b c d" => ["b", "b", "c", "d"],
1712             "b b d" => ["b", "b", "d"],
1713             "b c d" => ["b", "c", "d"],
1714             "b d" => ["b", "d"]};
1715             }
1716            
1717              
1718             =head2 longPaths($dfa)
1719              
1720             Find a set of paths that traverse each transition in the DFA with each path terminating in a final state.
1721              
1722             Parameter Description
1723             1 $dfa DFA
1724              
1725             B
1726              
1727              
1728             if (1)
1729             {my $dfa = fromExpr
1730             (zeroOrMore("a"),
1731             oneOrMore("b"),
1732             optional("c"),
1733             "d"
1734             );
1735            
1736             ok !$dfa->parser->accepts(qw());
1737             ok !$dfa->parser->accepts(qw(a));
1738             ok !$dfa->parser->accepts(qw(b));
1739             ok !$dfa->parser->accepts(qw(c));
1740             ok !$dfa->parser->accepts(qw(d));
1741             ok $dfa->parser->accepts(qw(b c d));
1742             ok $dfa->parser->accepts(qw(b d));
1743             ok !$dfa->parser->accepts(qw(b a));
1744             ok $dfa->parser->accepts(qw(b b d));
1745            
1746             is_deeply shortPaths ($dfa), { "b c d" => ["b", "c", "d"], "b d" => ["b", "d"] };
1747            
1748             is_deeply longPaths($dfa), # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1749              
1750             {"a b b c d" => ["a", "b", "b", "c", "d"],
1751             "a b b d" => ["a", "b", "b", "d"],
1752             "a b c d" => ["a" .. "d"],
1753             "a b d" => ["a", "b", "d"],
1754             "b b c d" => ["b", "b", "c", "d"],
1755             "b b d" => ["b", "b", "d"],
1756             "b c d" => ["b", "c", "d"],
1757             "b d" => ["b", "d"]};
1758             }
1759            
1760              
1761             =head2 loops($dfa)
1762              
1763             Find the non repeating loops from each state.
1764              
1765             Parameter Description
1766             1 $dfa DFA
1767              
1768             B
1769              
1770              
1771             if (1)
1772             {my $d = fromExpr choice
1773             oneOrMore "a",
1774             oneOrMore "b",
1775             oneOrMore "c",
1776             oneOrMore "d";
1777            
1778             is_deeply $d->print("(a(b(c(d)+)+)+)+"), <
1779             (a(b(c(d)+)+)+)+
1780             State Final Symbol Target Final
1781             1 0 a 3
1782             2 1 d 2 1
1783             3 2 1 a 3
1784             4 b 4
1785             5 c 1
1786             6 d 2 1
1787             7 3 b 4
1788             8 4 c 1
1789             END
1790            
1791             ok !$d->parser->accepts(qw());
1792             ok !$d->parser->accepts(qw(a b c));
1793             ok $d->parser->accepts(qw(a b c d));
1794             ok $d->parser->accepts(qw(a b c d b c d d));
1795             ok !$d->parser->accepts(qw(a b c b d c d d));
1796             ok !$d->parser->accepts(qw(a b c d a));
1797            
1798            
1799             is_deeply $d->loops, { # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲
1800              
1801             1 => [["d", "a", "b", "c"], ["d", "b", "c"], ["d", "c"]],
1802             2 => [["a" .. "d"], ["b", "c", "d"], ["c", "d"], ["d"]],
1803             3 => [["b", "c", "d", "a"]],
1804             4 => [["c", "d", "a", "b"], ["c", "d", "b"]]};
1805            
1806             is_deeply shortPaths($d), {"a b c d" => ["a" .. "d"]};
1807             is_deeply longPaths ($d), { "a b c d" => ["a" .. "d"], "a b c d d" => ["a" .. "d", "d"] };
1808            
1809             #say STDERR $d->printAsExpr;
1810             }
1811            
1812              
1813             =head1 Parser methods
1814              
1815             Use the DFA to parse a sequence of symbols
1816              
1817             =head2 Data::DFA::Parser::accept($parser, $symbol)
1818              
1819             Using the specified B<$parser>, accept the next symbol drawn from the symbol set if possible by moving to a new state otherwise confessing with a helpful message that such a move is not possible.
1820              
1821             Parameter Description
1822             1 $parser DFA Parser
1823             2 $symbol Next symbol to be processed by the finite state automaton
1824              
1825             B
1826              
1827              
1828             my $dfa = fromExpr # Construct DFA
1829             (element("a"),
1830             oneOrMore(choice(element("b"), element("c"))),
1831             optional(element("d")),
1832             element("e")
1833             );
1834             my $parser = $dfa->parser; # New parser
1835            
1836             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1837            
1838             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1839             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1840            
1841             ok !$parser->final; # Not in a final state
1842            
1843             ok $dfa->dumpAsJson eq <
1844             {
1845             "finalStates" : {
1846             "0" : null,
1847             "1" : null,
1848             "2" : null,
1849             "4" : null,
1850             "5" : 1
1851             },
1852             "transitions" : {
1853             "0" : {
1854             "a" : "2"
1855             },
1856             "1" : {
1857             "b" : "1",
1858             "c" : "1",
1859             "d" : "4",
1860             "e" : "5"
1861             },
1862             "2" : {
1863             "b" : "1",
1864             "c" : "1"
1865             },
1866             "4" : {
1867             "e" : "5"
1868             }
1869             }
1870             }
1871             END
1872            
1873             my $dfa = fromExpr # Construct DFA
1874             (zeroOrMore(sequence('a'..'c')),
1875             except('b'..'d')
1876             );
1877            
1878             ok $dfa->parser->accepts(qw(a b c a ));
1879             ok !$dfa->parser->accepts(qw(a b c a b));
1880             ok !$dfa->parser->accepts(qw(a b c a c));
1881             ok !$dfa->parser->accepts(qw(a c c a b c));
1882            
1883            
1884             ok $dfa->print(q(Test)) eq <
1885             Test
1886             State Final Symbol Target Final
1887             1 0 a 1 1
1888             2 1 1 b 2
1889             3 2 c 0
1890             END
1891            
1892              
1893             =head2 Data::DFA::Parser::final($parser)
1894              
1895             Returns whether the specified B<$parser> is in a final state or not.
1896              
1897             Parameter Description
1898             1 $parser DFA Parser
1899              
1900             B
1901              
1902              
1903             my $dfa = fromExpr # Construct DFA
1904             (zeroOrMore(sequence('a'..'c')),
1905             except('b'..'d')
1906             );
1907            
1908             ok $dfa->parser->accepts(qw(a b c a ));
1909             ok !$dfa->parser->accepts(qw(a b c a b));
1910             ok !$dfa->parser->accepts(qw(a b c a c));
1911             ok !$dfa->parser->accepts(qw(a c c a b c));
1912            
1913            
1914             ok $dfa->print(q(Test)) eq <
1915             Test
1916             State Final Symbol Target Final
1917             1 0 a 1 1
1918             2 1 1 b 2
1919             3 2 c 0
1920             END
1921            
1922              
1923             =head2 Data::DFA::Parser::next($parser)
1924              
1925             Returns an array of symbols that would be accepted in the current state by the specified B<$parser>.
1926              
1927             Parameter Description
1928             1 $parser DFA Parser
1929              
1930             B
1931              
1932              
1933             my $dfa = fromExpr # Construct DFA
1934             (element("a"),
1935             oneOrMore(choice(element("b"), element("c"))),
1936             optional(element("d")),
1937             element("e")
1938             );
1939             my $parser = $dfa->parser; # New parser
1940            
1941             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
1942            
1943             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
1944             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
1945            
1946             ok !$parser->final; # Not in a final state
1947            
1948             ok $dfa->dumpAsJson eq <
1949             {
1950             "finalStates" : {
1951             "0" : null,
1952             "1" : null,
1953             "2" : null,
1954             "4" : null,
1955             "5" : 1
1956             },
1957             "transitions" : {
1958             "0" : {
1959             "a" : "2"
1960             },
1961             "1" : {
1962             "b" : "1",
1963             "c" : "1",
1964             "d" : "4",
1965             "e" : "5"
1966             },
1967             "2" : {
1968             "b" : "1",
1969             "c" : "1"
1970             },
1971             "4" : {
1972             "e" : "5"
1973             }
1974             }
1975             }
1976             END
1977            
1978              
1979             =head2 Data::DFA::Parser::accepts($parser, @symbols)
1980              
1981             Confirm that the specified B<$parser> accepts an array representing a sequence of symbols.
1982              
1983             Parameter Description
1984             1 $parser DFA Parser
1985             2 @symbols Array of symbols
1986              
1987             B
1988              
1989              
1990             my $dfa = fromExpr # Construct DFA
1991             ("a",
1992             oneOrMore(choice(qw(b c))),
1993             optional("d"),
1994             "e"
1995             );
1996             is_deeply ['a'..'e'], [$dfa->symbols]; # List symbols
1997            
1998             my $dfa = fromExpr # Construct DFA
1999             (element("a"),
2000             oneOrMore(choice(element("b"), element("c"))),
2001             optional(element("d")),
2002             element("e")
2003             );
2004             my $parser = $dfa->parser; # New parser
2005            
2006             eval { $parser->accept($_) } for qw(a b a); # Try to parse a b a
2007            
2008             is_deeply [$parser->next], [qw(b c d e)]; # Next acceptable symbol
2009             is_deeply $parser->processed, [qw(a b)]; # Symbols processed
2010            
2011             ok !$parser->final; # Not in a final state
2012            
2013             ok $dfa->dumpAsJson eq <
2014             {
2015             "finalStates" : {
2016             "0" : null,
2017             "1" : null,
2018             "2" : null,
2019             "4" : null,
2020             "5" : 1
2021             },
2022             "transitions" : {
2023             "0" : {
2024             "a" : "2"
2025             },
2026             "1" : {
2027             "b" : "1",
2028             "c" : "1",
2029             "d" : "4",
2030             "e" : "5"
2031             },
2032             "2" : {
2033             "b" : "1",
2034             "c" : "1"
2035             },
2036             "4" : {
2037             "e" : "5"
2038             }
2039             }
2040             }
2041             END
2042            
2043              
2044             =head1 Data Structures
2045              
2046             Data structures used by this package.
2047              
2048              
2049             =head2 Data::DFA Definition
2050              
2051              
2052             DFA State
2053              
2054              
2055              
2056              
2057             =head3 Output fields
2058              
2059              
2060             =head4 final
2061              
2062             Whether this state is final
2063              
2064             =head4 nfaStates
2065              
2066             Hash whose keys are the NFA states that contributed to this super state
2067              
2068             =head4 pump
2069              
2070             Pumping lemmas for this state
2071              
2072             =head4 sequence
2073              
2074             Sequence of states to final state minus pumped states
2075              
2076             =head4 state
2077              
2078             Name of the state - the join of the NFA keys
2079              
2080             =head4 transitions
2081              
2082             Transitions from this state
2083              
2084              
2085              
2086             =head2 Data::DFA::Parser Definition
2087              
2088              
2089             Parse a sequence of symbols with a DFA
2090              
2091              
2092              
2093              
2094             =head3 Output fields
2095              
2096              
2097             =head4 dfa
2098              
2099             DFA being used
2100              
2101             =head4 fail
2102              
2103             Symbol on which we failed
2104              
2105             =head4 observer
2106              
2107             Optional sub($parser, $symbol, $target) to observe transitions.
2108              
2109             =head4 processed
2110              
2111             Symbols processed
2112              
2113             =head4 state
2114              
2115             Current state
2116              
2117              
2118              
2119             =head2 Data::DFA::State Definition
2120              
2121              
2122             DFA State
2123              
2124              
2125              
2126              
2127             =head3 Output fields
2128              
2129              
2130             =head4 final
2131              
2132             Whether this state is final
2133              
2134             =head4 nfaStates
2135              
2136             Hash whose keys are the NFA states that contributed to this super state
2137              
2138             =head4 pump
2139              
2140             Pumping lemmas for this state
2141              
2142             =head4 sequence
2143              
2144             Sequence of states to final state minus pumped states
2145              
2146             =head4 state
2147              
2148             Name of the state - the join of the NFA keys
2149              
2150             =head4 transitions
2151              
2152             Transitions from this state
2153              
2154              
2155              
2156             =head1 Private Methods
2157              
2158             =head2 newDFA()
2159              
2160             Create a new DFA.
2161              
2162              
2163             =head2 newState(%options)
2164              
2165             Create a new DFA state with the specified options.
2166              
2167             Parameter Description
2168             1 %options DFA state as hash
2169              
2170             =head2 fromNfa($nfa)
2171              
2172             Create a DFA parser from an NFA.
2173              
2174             Parameter Description
2175             1 $nfa Nfa
2176              
2177             =head2 finalState($nfa, $reach)
2178              
2179             Check whether, in the specified B<$nfa>, any of the states named in the hash reference B<$reach> are final. Final states that refer to reduce rules are checked for reduce conflicts.
2180              
2181             Parameter Description
2182             1 $nfa NFA
2183             2 $reach Hash of states in the NFA
2184              
2185             =head2 superState($dfa, $superStateName, $nfa, $symbols, $nfaSymbolTransitions)
2186              
2187             Create super states from existing superstate.
2188              
2189             Parameter Description
2190             1 $dfa DFA
2191             2 $superStateName Start state in DFA
2192             3 $nfa NFA we are converting
2193             4 $symbols Symbols in the NFA we are converting
2194             5 $nfaSymbolTransitions States reachable from each state by symbol
2195              
2196             =head2 superStates($dfa, $SuperStateName, $nfa)
2197              
2198             Create super states from existing superstate.
2199              
2200             Parameter Description
2201             1 $dfa DFA
2202             2 $SuperStateName Start state in DFA
2203             3 $nfa NFA we are tracking
2204              
2205             =head2 transitionOnSymbol($dfa, $superStateName, $symbol)
2206              
2207             The super state reached by transition on a symbol from a specified state.
2208              
2209             Parameter Description
2210             1 $dfa DFA
2211             2 $superStateName Start state in DFA
2212             3 $symbol Symbol
2213              
2214             =head2 renumberDfa($dfa, $initialStateName)
2215              
2216             Renumber the states in the specified B<$dfa>.
2217              
2218             Parameter Description
2219             1 $dfa DFA
2220             2 $initialStateName Initial super state name
2221              
2222             B
2223              
2224              
2225             my $dfa = fromExpr # Construct DFA
2226             (zeroOrMore(sequence('a'..'c')),
2227             except('b'..'d')
2228             );
2229            
2230             ok $dfa->parser->accepts(qw(a b c a ));
2231             ok !$dfa->parser->accepts(qw(a b c a b));
2232             ok !$dfa->parser->accepts(qw(a b c a c));
2233             ok !$dfa->parser->accepts(qw(a c c a b c));
2234            
2235            
2236             ok $dfa->print(q(Test)) eq <
2237             Test
2238             State Final Symbol Target Final
2239             1 0 a 1 1
2240             2 1 1 b 2
2241             3 2 c 0
2242             END
2243            
2244              
2245             =head2 printFinal($final)
2246              
2247             Print a final state
2248              
2249             Parameter Description
2250             1 $final Final State
2251              
2252             =head2 removeDuplicatedStates($dfa)
2253              
2254             Remove duplicated states in a B<$dfa>.
2255              
2256             Parameter Description
2257             1 $dfa Deterministic finite state automaton generated from an expression
2258              
2259             =head2 removeUnreachableStates($dfa)
2260              
2261             Remove unreachable states in a B<$dfa>.
2262              
2263             Parameter Description
2264             1 $dfa Deterministic finite state automaton generated from an expression
2265              
2266             =head2 printAsExpr2($dfa, %options)
2267              
2268             Print a DFA B<$dfa_> in an expression form determined by the specified B<%options>.
2269              
2270             Parameter Description
2271             1 $dfa Dfa
2272             2 %options Options.
2273              
2274             =head2 subArray($A, $B)
2275              
2276             Whether the second array is contained within the first.
2277              
2278             Parameter Description
2279             1 $A Exterior array
2280             2 $B Interior array
2281              
2282             =head2 removeLongerPathsThatContainShorterPaths($paths)
2283              
2284             Remove longer paths that contain shorter paths.
2285              
2286             Parameter Description
2287             1 $paths Paths
2288              
2289              
2290             =head1 Index
2291              
2292              
2293             1 L - Choice from amongst one or more elements.
2294              
2295             2 L - Using the specified B<$parser>, accept the next symbol drawn from the symbol set if possible by moving to a new state otherwise confessing with a helpful message that such a move is not possible.
2296              
2297             3 L - Confirm that the specified B<$parser> accepts an array representing a sequence of symbols.
2298              
2299             4 L - Returns whether the specified B<$parser> is in a final state or not.
2300              
2301             5 L - Returns an array of symbols that would be accepted in the current state by the specified B<$parser>.
2302              
2303             6 L - Create a JSON string representing a B<$dfa>.
2304              
2305             7 L - One element.
2306              
2307             8 L - Choice from amongst all symbols except the ones mentioned
2308              
2309             9 L - Check whether, in the specified B<$nfa>, any of the states named in the hash reference B<$reach> are final.
2310              
2311             10 L - Create a DFA parser from a regular B<@expression>.
2312              
2313             11 L - Create a DFA parser from an NFA.
2314              
2315             12 L - Find a set of paths that traverse each transition in the DFA with each path terminating in a final state.
2316              
2317             13 L - Find the non repeating loops from each state.
2318              
2319             14 L - Create a new DFA.
2320              
2321             15 L - Create a new DFA state with the specified options.
2322              
2323             16 L - One or more repetitions of a sequence of elements.
2324              
2325             17 L - An optional sequence of element.
2326              
2327             18 L - Convert the L <>DTD> Element definition in the specified B<$string> to a DFA.
2328              
2329             19 L - Convert the Dtd Element definition in B<$string> to a parse tree.
2330              
2331             20 L - Create a parser from a B<$dfa> constructed from a regular expression.
2332              
2333             21 L - Print the specified B<$dfa> using the specified B<$title>.
2334              
2335             22 L - Print a B<$dfa> as an expression.
2336              
2337             23 L - Print a DFA B<$dfa_> in an expression form determined by the specified B<%options>.
2338              
2339             24 L - Print a B<$dfa> as a regular expression.
2340              
2341             25 L - Print a final state
2342              
2343             26 L - Remove duplicated states in a B<$dfa>.
2344              
2345             27 L - Remove longer paths that contain shorter paths.
2346              
2347             28 L - Remove unreachable states in a B<$dfa>.
2348              
2349             29 L - Renumber the states in the specified B<$dfa>.
2350              
2351             30 L - Sequence of elements.
2352              
2353             31 L - Find a set of paths that reach every state in the DFA with each path terminating in a final state.
2354              
2355             32 L - Whether the second array is contained within the first.
2356              
2357             33 L - Create super states from existing superstate.
2358              
2359             34 L - Create super states from existing superstate.
2360              
2361             35 L - Return an array of all the symbols accepted by a B<$dfa>.
2362              
2363             36 L - The super state reached by transition on a symbol from a specified state.
2364              
2365             37 L - Check that the L is univalent: a univalent L has a mapping from symbols to states.
2366              
2367             38 L - Zero or more repetitions of a sequence of elements.
2368              
2369             =head1 Installation
2370              
2371             This module is written in 100% Pure Perl and, thus, it is easy to read,
2372             comprehend, use, modify and install via B:
2373              
2374             sudo cpan install Data::DFA
2375              
2376             =head1 Author
2377              
2378             L
2379              
2380             L
2381              
2382             =head1 Copyright
2383              
2384             Copyright (c) 2016-2019 Philip R Brenan.
2385              
2386             This module is free software. It may be used, redistributed and/or modified
2387             under the same terms as Perl itself.
2388              
2389             =cut
2390              
2391              
2392              
2393             # Tests and documentation
2394              
2395             sub test
2396 1     1 0 11 {my $p = __PACKAGE__;
2397 1         9 binmode($_, ":utf8") for *STDOUT, *STDERR;
2398 1 50       70 return if eval "eof(${p}::DATA)";
2399 1         58 my $s = eval "join('', <${p}::DATA>)";
2400 1 50       9 $@ and die $@;
2401 1     1   7 eval $s;
  1     1   2  
  1     1   39  
  1     1   6  
  1         1  
  1         29  
  1         5  
  1         3  
  1         8  
  1         741  
  1         65467  
  1         12  
  1         88  
2402 1 50       9 $@ and die $@;
2403 1         131 1
2404             }
2405              
2406             test unless caller;
2407              
2408             1;
2409             # podDocumentation
2410             __DATA__