File Coverage

blib/lib/Data/NFA.pm
Criterion Covered Total %
statement 270 286 94.4
branch 81 94 86.1
condition 4 8 50.0
subroutine 46 50 92.0
pod 25 33 75.7
total 426 471 90.4


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2             #-------------------------------------------------------------------------------
3             # Non deterministic finite state machine from regular expression.
4             # Philip R Brenan at gmail dot com, Appa Apps Ltd Inc., 2018
5             #-------------------------------------------------------------------------------
6             # podDocumentation
7             package Data::NFA;
8             our $VERSION = 20201031;
9             require v5.26;
10 1     1   800 use warnings FATAL => qw(all);
  1         7  
  1         40  
11 1     1   5 use strict;
  1         2  
  1         37  
12 1     1   6 use Carp qw(confess);
  1         2  
  1         82  
13 1     1   531 use Data::Dump qw(dump);
  1         7677  
  1         78  
14 1     1   3624 use Data::Table::Text qw(:all);
  1         137575  
  1         1762  
15 1     1   16 use utf8;
  1         2  
  1         8  
16              
17             # NFA : bless {state=>{symbol=>state}, jumps=>{state=>1}, final=>defined if final, return=>state to return to}
18             # Jumps are instead of a transition not after a transition
19              
20             my $logFile = q(/home/phil/z/z/z/zzz.txt); # Log printed results if developing
21              
22 101     101 0 286 sub Element {q(element)} # Element of a regular expression.
23 43     43 0 127 sub Sequence {q(sequence)} # Sequence of regular expressions.
24 37     37 0 107 sub Optional {q(optional)} # Optional regular expression.
25 41     41 0 130 sub ZeroOrMore{q(zeroOrMore)} # Zero or more instances of a regular expression.
26 24     24 0 77 sub OneOrMore {q(oneOrMore)} # One or more instances of a regular expression.
27 35     35 0 129 sub Choice {q(choice)} # Choice between regular expressions.
28 4     4 0 13 sub Except {q(except)} # Choice between any symbols mentioned so far minus the ones listed.
29              
30             #D1 Construct regular expression # Construct a regular expression that defines the language to be parsed using the following combining operations which can all be imported:
31              
32             sub element($) #S One element. An element can also be represented by a string or number
33 7     7 1 19 {my ($label) = @_; # Transition symbol
34 7         21 [Element, @_]
35             }
36              
37             sub sequence(@) #S Sequence of elements and/or symbols.
38 4     4 1 10 {my (@elements) = @_; # Elements
39 4         11 [Sequence, @elements]
40             }
41              
42             sub optional(@) #S An optional sequence of elements and/or symbols.
43 2     2 1 5 {my (@element) = @_; # Elements
44 2         9 [Optional, @element]
45             }
46              
47             sub zeroOrMore(@) #S Zero or more repetitions of a sequence of elements and/or symbols.
48 8     8 1 21 {my (@element) = @_; # Elements
49 8         23 [ZeroOrMore, @element]
50             }
51              
52             sub oneOrMore(@) #S One or more repetitions of a sequence of elements and/or symbols.
53 2     2 1 7 {my (@element) = @_; # Elements
54 2         8 [OneOrMore, @element]
55             }
56              
57             sub choice(@) #S Choice from amongst one or more elements and/or symbols.
58 15     15 1 43 {my (@elements) = @_; # Elements to be chosen from
59 15         43 [Choice, @elements]
60             }
61              
62             sub except(@) #S Choice from amongst all symbols except the ones mentioned
63 2     2 1 5 {my (@elements) = @_; # Elements not to be chosen from
64 2         8 [Except, @elements]
65             }
66              
67             #D1 Non deterministic finite state machine # Create a non deterministic finite state machine to represent a regular expression.
68              
69             sub newNfa(%) #P Create a new NFA
70 0     0 1 0 {my (%options) = @_; # Options
71 0         0 bless {}, q(Data::NFA);
72             }
73              
74             sub newNfaState(%) #P Create a new NFA state.
75 221     221 1 490 {my (%options) = @_; # Options
76              
77 221         491 my $r = genHash(q(Data::NFA::State), # NFA State
78             transitions => undef, # {symbol => state} : transitions from this state consuming one input symbol
79             jumps => undef, # {to => 1} : jumps from this state not consuming any input symbols
80             final => undef, # Whether this state is final
81             );
82              
83 221         10127 %$r = (%$r, %options);
84              
85 221         571 $r
86             }
87              
88             sub addNewState(%) #P Create a new NFA state and add it to an NFA created with L.
89 0     0 1 0 {my ($nfa) = @_; # Nfa
90 0         0 my $n = keys %$nfa;
91 0         0 $$nfa{$n} = newNfaState;
92             }
93              
94             sub fromExpr2($$$) #P Create an NFA from a regular expression.
95 104     104 1 202 {my ($states, $expr, $symbols) = @_; # States, regular expression constructed from L L L L L L, set of symbols used by the NFA.
96 104   50     216 $states //= {};
97 104     378   341 my $next = sub{scalar keys %$states}; # Next state name
  378         854  
98 104     0   276 my $last = sub{&$next - 1}; # Last state created
  0         0  
99              
100             my $save = sub # Save as a new state
101 201     201   328 {my ($transitions, $jumps, $final) = @_; # Transitions, jumps, final
102 201         330 my $s = $states->{&$next} = newNfaState
103             (transitions=>$transitions, jumps=>$jumps, final=>$final);
104 201         385 $s;
105 104         342 };
106              
107             my $jump = sub # Add jumps
108 53     53   134 {my ($from, @to) = @_;
109 53         91 my $state = $states->{$from};
110 53         1132 $state->jumps->{$_}++ for @to
111 104         302 };
112              
113 104         183 my $start = &$next + 1; # Start state
114 104         350 &$save(undef, {$start=>1}); # Offset the start of each expression by one cell to allow zeroOrMore, oneOrMore to jump back to their beginning without jumping back to the start of a containing choice
115              
116 104 100       235 if (!ref($expr)) # Element not wrapped with element()
117 55         164 {&$save({$expr=>$start+1}, undef);
118             }
119             else
120 49         100 {my ($structure) = @$expr;
121 49 100       96 if ($structure eq Element) # Element
    100          
    100          
    100          
    100          
    100          
    50          
122 10         20 {my (undef, $element) = @$expr;
123 10         32 &$save({$element=>$start+1}, undef);
124             }
125             elsif ($structure eq Sequence) # Sequence of elements
126 4         12 {my (undef, @elements) = @$expr;
127 4         14 $states->fromExpr2($_, $symbols) for @elements;
128             }
129             elsif ($structure eq Optional) # Optional element
130 2         6 {my (undef, @elements) = @$expr;
131 2         26 $states->fromExpr2($_, $symbols) for @elements;
132 2         5 &$jump($start, &$next); # Optional so we have the option of jumping over it
133             }
134             elsif ($structure eq ZeroOrMore) # Zero or more
135 11         29 {my (undef, @elements) = @$expr;
136 11         54 $states->fromExpr2($_, $symbols) for @elements;
137 11         26 &$jump($start, &$next+1); # Optional so we have the option of jumping over it
138 11         84 &$save(undef, {$start=>1}); # Repeated so we have the option of redoing it
139             }
140             elsif ($structure eq OneOrMore) # One or more
141 2         7 {my (undef, @elements) = @$expr;
142 2         10 $states->fromExpr2($_, $symbols) for @elements;
143 2         6 my $N = &$next;
144 2         5 &$save(); # Create new empty state
145 2         6 &$jump($N, $start, $N+1); # Do it again or move on
146             }
147             elsif ($structure eq Choice) # Choice
148 18         45 {my (undef, @elements) = @$expr;
149 18         28 my @fix;
150 18         55 for my $i(keys @elements) # Each element index
151 35         65 {my $element = $elements[$i]; # Each element separate by a gap so we can not jump in then jump out
152 35 100       72 if ($i)
153 17         34 {&$jump($start, &$next)
154             }
155 35         1259 $states->fromExpr2($element, $symbols); # Choice
156 35 100       99 if ($i < $#elements)
157 17         28 {push @fix, &$next;
158 17         29 &$save(); # Fixed later to jump over subsequent choices
159             }
160             }
161 18         31 my $N = &$next; # Fix intermediates
162 18         46 &$jump($_, $N) for @fix;
163             }
164             elsif ($structure eq Except) # Except
165 2         7 {my (undef, @exclude) = @$expr;
166 2 50       7 my %exclude = map{(ref $_ ? $$_[1] : $_)=>1} @exclude; # Names of elements to exclude
  4         17  
167 2         5 my @fix;
168 2         10 my @elements = grep {!$exclude{$_}}
  8         18  
169             sort keys %$symbols; # Each element not excluded
170 2         10 for my $i(keys @elements) # Each element index
171 4         10 {my $element = $elements[$i]; # Each element separate by a gap so we can not jump in then jump out
172 4 100       13 &$jump($start, &$next) if $i;
173 4         27 $states->fromExpr2(element($element), $symbols); # Choice of not excluded symbols
174 4 100       16 if ($i < $#elements)
175 2         6 {push @fix, &$next;
176 2         5 &$save(); # Fixed later to jump over subsequent choices
177             }
178             }
179 2         5 my $N = &$next; # Fix intermediates
180             # &$save();
181 2         8 &$jump($_, $N) for @fix;
182             }
183             else # Unknown request
184 0         0 {confess "Unknown structuring operation: $structure";
185             }
186             }
187 104         839 $states
188             } # fromExpr2
189              
190             sub propagateFinalState($) #P Mark the B<$states> that can reach the final state with a jump as final.
191 20     20 1 42 {my ($states) = @_; # States
192 20         33 my %checked;
193 20         34 for(;;)
194 29         41 {my $changes = 0;
195 29         80 for my $state(values %$states) # Each state
196 349 100       6489 {if (!defined $state->final) # Current state is not a final state
197 282 50       5215 {if (defined $state->jumps)
198 282         5135 {for my $jumpName(sort keys $state->jumps->%*) # Each jump
199 557         2820 {my $jump = $$states{$jumpName};
200 557 100       8712 if (defined(my $final = $jump->final)) # Target state is final
201 38         162 {++$changes;
202 38         598 $state->final = $final; # Mark state as final
203 38         173 last;
204             }
205             }
206             }
207             }
208             }
209 29 100       184 last unless $changes;
210             }
211             } # propagateFinalState
212              
213             sub statesReachableViaJumps($$) #P Find the names of all the B<$states> that can be reached from a specified B<$stateName> via jumps alone.
214 401     401 1 742 {my ($states, $StateName) = @_; # States, name of start state
215 401         487 my %reachable;
216 401         699 my @check = ($StateName);
217 401         487 my %checked;
218              
219 401         711 while(@check) # Reachable from the start state by a single transition after zero or more jumps
220 2571         5206 {my $stateName = pop @check;
221 2571 100       5300 next if $checked{$stateName}++;
222 1587 50       2947 confess "No such state: $stateName" unless my $state = $$states{$stateName};
223 1587         26086 for my $s(sort keys $state->jumps->%*) # States that can be reached via jumps
224 2170         6140 {$reachable{$s}++; # New state to check
225 2170         4061 push @check, $s;
226             }
227             }
228              
229 401         4757 [sort keys %reachable]
230             } # statesReachableViaJumps
231              
232             sub removeEmptyFields($) #P Remove empty fields from the B representing an NFA.
233 20     20 1 38 {my ($states) = @_; # States
234 20         61 for my $state(values %$states) # Remove empty fields
235 221         299 {for(qw(jumps transitions))
236 442 100       983 {delete $$state{$_} unless keys $$state{$_}->%*;
237             }
238 221 100       417 delete $$state{final} unless defined $$state{final};
239             }
240             } # removeEmptyFields
241              
242             sub fromExpr(@) #S Create an NFA from a regular B<@expression>.
243 20     20 1 52 {my (@expression) = @_; # Regular expressions
244 20         44 my $states = bless {};
245 20         40 my %symbols; # Symbols named in expression
246             my $symbols; $symbols = sub # Locate symbols
247 62     62   110 {my ($expr) = @_;
248 62 100       130 if (ref $expr)
249 45 100       94 {$symbols{$$expr[1]}++ if $$expr[0] eq Element; # Add symbol enclosed in element
250 45         108 my ($type, @elements) = @$expr;
251 45         77 for(@elements)
252 68 100       208 {ref $_ ? $symbols->($_) : $symbols{$_}++;
253             }
254             }
255             else # Process sub expressions
256 17         54 {$symbols{$expr}++; # Add symbol not enclosed in element()
257             }
258 20         126 };
259 20         71 $symbols->($_) for @expression; # Locate all symbols
260              
261 20         87 $states->fromExpr2($_, \%symbols) for @expression; # Create state transitions
262 20         44 $states->{keys %$states} = newNfaState(final=>1); # End state
263              
264 20         111 for my $state(sort keys %$states) # Collapse multiple jumps
265             {$$states{$state}->jumps =
266 221         1029 {map {$_=>1} @{statesReachableViaJumps($states, $state)}};
  611         3350  
  221         385  
267             }
268              
269 20         150 $states->propagateFinalState; # Propagate final states
270              
271 20         67 $states->removeEmptyFields; # Remove any empty fields
272              
273 20         437 $states
274             } # fromExpr
275              
276             sub printFinalState($) #P Print the final field of the specified B<$state>.
277 233     233 1 362 {my ($state) = @_; # State
278 233 100       3890 defined($state->final) ? 1 : q();
279             }
280              
281             sub printWithJumps($;$) #P Print the current B<$states> of an NFA with jumps using the specvified B<$title>.
282 21     21 1 56 {my ($states, $title) = @_; # States, optional title
283 21         30 my @o;
284 21         57 push @o, [qw(Location F Transitions Jumps)];
285 21         88 for(sort{$a <=> $b} keys %$states)
  593         1049  
286 233         75683 {my $d = $states->{$_};
287 233         318 my @j = sort {$a <=> $b} keys %{$d->jumps};
  1224         2174  
  233         4815  
288 233         1059 my $f = printFinalState($d);
289 233 100       4977 push @o, [sprintf("%4d", $_), $f,
290             dump($d->transitions),
291             dump(@j ? [@j] : undef)];
292             }
293 21         3300 my $t = formatTableBasic([@o]);
294 21 50       17165 $title ? "$title\n$t" : $t
295             }
296              
297             sub printWithOutJumps($$) #P Print the current B<$states> of an NFA without jumps using the specified B<$title>.
298 0     0 1 0 {my ($states, $title) = @_; # States, title.
299 0         0 my @o;
300 0         0 push @o, [qw(Location F Transitions)];
301 0         0 for(sort{$a <=> $b} keys %$states)
  0         0  
302 0         0 {my $d = $states->{$_};
303 0         0 my $f = printFinalState($d);
304 0         0 push @o, [sprintf("%4d", $_), $f,
305             dump($d->transitions)];
306             }
307 0         0 "$title\n". formatTableBasic([@o]);
308             }
309              
310             sub print($$) # Print the current B<$states> of the non deterministic finite state automaton using the specified B<$title>. If it is non deterministic, the non deterministic jumps will be shown as well as the transitions table. If deterministic, only the transitions table will be shown.
311 21     21 1 63 {my ($states, $title) = @_; # States, title
312 21         36 my $j = 0; # Number of non deterministic jumps encountered
313 21         112 for(sort{$a <=> $b} keys %$states)
  593         785  
314 233         334 {my $d = $states->{$_};
315 233         260 my @j = sort keys %{$d->jumps};
  233         3780  
316 233 100       1578 ++$j if @j > 0;
317             }
318              
319 21 50       91 my $r = $j ? &printWithJumps(@_) : &printWithOutJumps(@_); # Print
320              
321 21 50       385 owf($logFile, $r) if -e $logFile; # Log the result if requested
322 21         209 $r # Return the result
323             }
324              
325             sub symbols($) # Return an array of all the transition symbols.
326 2     2 1 5 {my ($states) = @_; # States
327 2         5 my %s;
328 2         8 for my $d(values %$states)
329 28 100       542 {if ($d->transitions)
330 7         130 {$s{$_}++ for sort keys $d->transitions->%*;
331             }
332             }
333 2         22 sort keys %s
334             }
335              
336             sub isFinal($$) # Whether, in the B<$states> specifying an NFA the named state B<$state> is a final state.
337 2     2 1 5 {my ($states, $state) = @_; # States, name of state to test
338 2         38 my $f = $$states{$state}->final;
339 2 100       12 my $F = defined($f) ? $f : undef; # Defined yields "" for false which is not what we want
340 2         37 $F
341             }
342              
343             sub statesReachableViaSymbol($$$$) #P Find the names of all the states that can be reached from a specified state via a specified symbol and all the jumps available.
344 457     457 1 782 {my ($states, $StateName, $symbol, $cache) = @_; # States, name of start state, symbol to reach on, a hash to be used as a cache
345 457         600 my %reachable;
346 457         695 my @check = ($StateName);
347 457         594 my %checked;
348              
349 457         773 while(@check) # Reachable from the start state by a single transition after zero or more jumps
350 2241         7880 {my $stateName = pop @check;
351 2241 100       4894 next if $checked{$stateName}++;
352 1367 50       2471 confess "No such state: $stateName" unless my $state = $$states{$stateName};
353              
354 1367 100       22424 if ($state->transitions)
355 575 100       10908 {if (my $t = $state->transitions->{$symbol}) # Transition on the symbol
356 178         906 {$reachable{$t}++;
357             $reachable{$_}++
358 178   33     208 for @{$$cache{$t} //= statesReachableViaJumps($states, $t)}; # Cache results of this expensive call
  178         579  
359             }
360             }
361 1367         25270 push @check, sort keys $state->jumps->%*; # Make a jump and try again
362             }
363              
364 457         2792 [sort keys %reachable]
365             } # statesReachableViaSymbol
366              
367             sub allTransitions($) # Return all transitions in the NFA specified by B<$states> as {stateName}{symbol} = [reachable states].
368 1     1 1 4 {my ($states) = @_; # States
369 1         5 my $symbols = [$states->symbols]; # Symbols in nfa
370 1         52 my $cache = {}; # Cache results
371              
372 1         5 my $nfaSymbolTransitions;
373 1         9 for my $StateName(sort keys %$states) # Each NFA state
374 12         26 {my $target = $$nfaSymbolTransitions{$StateName} = {};
375 12         18 for my $symbol(@$symbols) # Each NFA symbol
376             {my $statesReachableViaSymbol = sub #P Find the names of all the states that can be reached from a specified state via a specified symbol and all the jumps available.
377 12     12   15 {my %reachable;
378 12         23 my @check = ($StateName);
379 12         17 my %checked;
380              
381 12         33 while(@check) # Reachable from the start state by a single transition after zero or more jumps
382 131         172 {my $stateName = pop @check;
383 131 100       315 next if $checked{$stateName}++;
384 57         80 my $state = $$states{$stateName};
385 57 50       94 confess "No such state: $stateName" unless $state;
386 57         979 my $transitions = $state->transitions;
387              
388 57 100       274 if (defined(my $to = $$transitions{$symbol})) # Transition on the symbol
389 15         28 {$reachable{$to}++;
390             $reachable{$_}++
391 15   66     19 for @{$$cache{$to} //= statesReachableViaJumps($states, $to)}; # Cache results of this expensive call
  15         63  
392             }
393 57 50       918 if (my $jumps = $state->jumps)
394 57         380 {push @check, sort keys %$jumps; # Make a jump and try again
395             }
396             }
397              
398 12         129 [sort keys %reachable] # List of reachable states
399 12         48 }; # statesReachableViaSymbol
400              
401 12         24 $$target{$symbol} = &$statesReachableViaSymbol; # States in the NFA reachable on the symbol
402             }
403             }
404              
405             $nfaSymbolTransitions
406 1         47 } # allTransitions
407              
408             sub parse2($$@) #P Parse an array of symbols
409 561     561 1 1141 {my ($states, $stateName, @symbols) = @_; # States, current state, remaining symbols
410              
411 561 100       9552 if (defined(my $final = $$states{$stateName}->final)) # Return success if we are in a final state with no more symbols to parse
412 120 100       701 {return 1 unless @symbols;
413             }
414              
415 521 100       2804 return 0 unless @symbols; # No more symbols but not in a final state
416              
417 457         943 my ($symbol, @remainder) = @symbols; # Current symbol to parse
418 457         932 my $reachable = statesReachableViaSymbol($states, $stateName, $symbol, {}); # States reachable from the current state via the current symbol
419              
420 457         1027 for my $nextState(@$reachable) # Each state reachable from the current state
421 464         852 {my $result = &parse2($states, $nextState, @remainder); # Try each reachable state
422 464 100       1091 return $result if $result; # Propagate success if a solution was found
423             }
424              
425             undef # No path to a final state found
426 368         855 }
427              
428             sub parse($@) # Parse, using the NFA specified by B<$states>, the list of symbols in L<@symbols>.
429 97     97 1 290 {my ($states, @symbols) = @_; # States, array of symbols
430              
431 97         229 parse2($states, 0, @symbols);
432             }
433              
434             #D0
435             #-------------------------------------------------------------------------------
436             # Export
437             #-------------------------------------------------------------------------------
438              
439 1     1   3452 use Exporter qw(import);
  1         2  
  1         46  
440              
441 1     1   7 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  1         11  
  1         392  
442              
443             @ISA = qw(Exporter);
444             @EXPORT_OK = qw(
445             choice
446             element
447             except
448             oneOrMore optional
449             sequence
450             zeroOrMore
451             );
452             %EXPORT_TAGS = (all=>[@EXPORT, @EXPORT_OK]);
453              
454             # podDocumentation
455              
456             =pod
457              
458             =encoding utf-8
459              
460             =head1 Name
461              
462             Data::NFA - Non deterministic finite state machine from regular expression.
463              
464             =head1 Synopsis
465              
466             Create a non deterministic finite state machine from a regular expression which
467             can then be converted into a deterministic finite state machine by L
468             and used to parse sequences of symbols.
469              
470             For example, the regular expression:
471              
472             ((a|b)*)**4
473              
474             produces the following machine:
475              
476             use Data::NFA qw(:all);
477             use Data::Table::Text qw(:all);
478             use Test::More qw(no_plan);
479              
480             my $N = 4;
481              
482             my $s = q(zeroOrMore(choice(element("a"), element("b"))));
483              
484             my $nfa = eval qq(fromExpr(($s)x$N));
485              
486             ok $nfa->printNws("((a|b)*)**$N: ") eq nws <
487             ((a|b)*)**4:
488             Location F Transitions Jumps
489             0 1 { a => 1 } [2, 4, 6, 8, 10, 12, 14, 16]
490             1 1 undef [0, 2, 3, 4, 6, 8, 10, 12, 14, 16]
491             2 0 { b => 3 } undef
492             3 1 undef [0, 2, 4, 6, 8, 10, 12, 14, 16]
493             4 1 { a => 5 } [6, 8, 10, 12, 14, 16]
494             5 1 undef [4, 6, 7, 8, 10, 12, 14, 16]
495             6 0 { b => 7 } undef
496             7 1 undef [4, 6, 8, 10, 12, 14, 16]
497             8 1 { a => 9 } [10, 12, 14, 16]
498             9 1 undef [8, 10, 11, 12, 14, 16]
499             10 0 { b => 11 } undef
500             11 1 undef [8, 10, 12, 14, 16]
501             12 1 { a => 13 } [14, 16]
502             13 1 undef [12, 14, 15, 16]
503             14 0 { b => 15 } undef
504             15 1 undef [12, 14, 16]
505             16 1 undef undef
506             END
507              
508             =head1 Description
509              
510             Non deterministic finite state machine from regular expression.
511              
512              
513             Version 20200621.
514              
515              
516             The following sections describe the methods in each functional area of this
517             module. For an alphabetic listing of all methods by name see L.
518              
519              
520              
521             =head1 Construct regular expression
522              
523             Construct a regular expression that defines the language to be parsed using the following combining operations which can all be imported:
524              
525             =head2 element($label)
526              
527             One element. An element can also be represented by a string or number
528              
529             Parameter Description
530             1 $label Transition symbol
531              
532             B
533              
534              
535             my $nfa = fromExpr(𝗲𝗹𝗲𝗺𝗲𝗻𝘁("a"));
536             ok $nfa->print("Element: a") eq <
537             Element: a
538             Location F Transitions Jumps
539             0 undef [1]
540             1 { a => 2 } undef
541             2 1 undef undef\
542             END
543             ok $nfa->isFinal(2);
544             ok !$nfa->isFinal(0);
545             ok $nfa->parse(qw(a));
546             ok !$nfa->parse(qw(a b));
547             ok !$nfa->parse(qw(b));
548             ok !$nfa->parse(qw(b a));
549              
550              
551             This is a static method and so should either be imported or invoked as:
552              
553             Data::NFA::element
554              
555              
556             =head2 sequence(@elements)
557              
558             Sequence of elements and/or symbols.
559              
560             Parameter Description
561             1 @elements Elements
562              
563             B
564              
565              
566             my $nfa = fromExpr(qw(a b));
567             is_deeply $nfa->print("ab"), <
568             ab
569             Location F Transitions Jumps
570             0 undef [1]
571             1 { a => 2 } undef
572             2 undef [3]
573             3 { b => 4 } undef
574             4 1 undef undef
575             END
576             ok !$nfa->parse(qw());
577             ok $nfa->parse(qw(a b));
578             ok !$nfa->parse(qw(b a));
579             ok !$nfa->parse(qw(a));
580             ok !$nfa->parse(qw(b));
581              
582              
583             This is a static method and so should either be imported or invoked as:
584              
585             Data::NFA::sequence
586              
587              
588             =head2 optional(@element)
589              
590             An optional sequence of elements and/or symbols.
591              
592             Parameter Description
593             1 @element Elements
594              
595             B
596              
597              
598             my $nfa = fromExpr("a", 𝗼𝗽𝘁𝗶𝗼𝗻𝗮𝗹("b"), "c");
599             is_deeply $nfa->print("ab?c"), <
600             ab?c
601             Location F Transitions Jumps
602             0 undef [1]
603             1 { a => 2 } undef
604             2 undef [3 .. 6]
605             3 undef [4, 5, 6]
606             4 { b => 5 } undef
607             5 undef [6]
608             6 { c => 7 } undef
609             7 1 undef undef
610             END
611             ok !$nfa->parse(qw(a));
612             ok $nfa->parse(qw(a b c));
613             ok $nfa->parse(qw(a c));
614             ok !$nfa->parse(qw(a c b));
615              
616              
617             This is a static method and so should either be imported or invoked as:
618              
619             Data::NFA::optional
620              
621              
622             =head2 zeroOrMore(@element)
623              
624             Zero or more repetitions of a sequence of elements and/or symbols.
625              
626             Parameter Description
627             1 @element Elements
628              
629             B
630              
631              
632             my $nfa = fromExpr("a", 𝘇𝗲𝗿𝗼𝗢𝗿𝗠𝗼𝗿𝗲("b"), "c");
633             is_deeply $nfa->print("ab*c"), <
634             ab*c
635             Location F Transitions Jumps
636             0 undef [1]
637             1 { a => 2 } undef
638             2 undef [3, 4, 6, 7]
639             3 undef [4, 6, 7]
640             4 { b => 5 } undef
641             5 undef [3, 4, 6, 7]
642             6 undef [7]
643             7 { c => 8 } undef
644             8 1 undef undef
645             END
646             ok $nfa->parse(qw(a c));
647             ok $nfa->parse(qw(a b c));
648             ok $nfa->parse(qw(a b b c));
649             ok !$nfa->parse(qw(a b b d));
650              
651             my $nfa = fromExpr("a",
652             𝘇𝗲𝗿𝗼𝗢𝗿𝗠𝗼𝗿𝗲(choice("a",
653             "a")),
654             "a");
655             is_deeply $nfa->print("(a(a|a)*a"), <
656             (a(a|a)*a
657             Location F Transitions Jumps
658             0 undef [1]
659             1 { a => 2 } undef
660             2 undef [3, 4, 5, 7, 8, 10, 11]
661             3 undef [4, 5, 7, 8, 10, 11]
662             4 undef [5, 7, 8]
663             5 { a => 6 } undef
664             6 undef [3, 4, 5, 7 .. 11]
665             7 undef [8]
666             8 { a => 9 } undef
667             9 undef [3, 4, 5, 7, 8, 10, 11]
668             10 undef [11]
669             11 { a => 12 } undef
670             12 1 undef undef
671             END
672              
673             ok !$nfa->parse(qw(a));
674             ok $nfa->parse(qw(a a));
675             ok $nfa->parse(qw(a a a));
676             ok !$nfa->parse(qw(a b a));
677              
678              
679             This is a static method and so should either be imported or invoked as:
680              
681             Data::NFA::zeroOrMore
682              
683              
684             =head2 oneOrMore(@element)
685              
686             One or more repetitions of a sequence of elements and/or symbols.
687              
688             Parameter Description
689             1 @element Elements
690              
691             B
692              
693              
694             my $nfa = fromExpr("a", 𝗼𝗻𝗲𝗢𝗿𝗠𝗼𝗿𝗲("b"), "c");
695              
696             is_deeply $nfa->print("One or More: ab+c"), <
697             One or More: ab+c
698             Location F Transitions Jumps
699             0 undef [1]
700             1 { a => 2 } undef
701             2 undef [3, 4]
702             3 undef [4]
703             4 { b => 5 } undef
704             5 undef [3, 4, 6, 7]
705             6 undef [7]
706             7 { c => 8 } undef
707             8 1 undef undef
708             END
709              
710             ok !$nfa->parse(qw(a c));
711             ok $nfa->parse(qw(a b c));
712             ok $nfa->parse(qw(a b b c));
713             ok !$nfa->parse(qw(a b b d));
714              
715              
716             This is a static method and so should either be imported or invoked as:
717              
718             Data::NFA::oneOrMore
719              
720              
721             =head2 choice(@elements)
722              
723             Choice from amongst one or more elements and/or symbols.
724              
725             Parameter Description
726             1 @elements Elements to be chosen from
727              
728             B
729              
730              
731             my $nfa = fromExpr("a",
732             𝗰𝗵𝗼𝗶𝗰𝗲(qw(b c)),
733             "d");
734             is_deeply $nfa->print("(a(b|c)d"), <
735             (a(b|c)d
736             Location F Transitions Jumps
737             0 undef [1]
738             1 { a => 2 } undef
739             2 undef [3, 4, 6, 7]
740             3 undef [4, 6, 7]
741             4 { b => 5 } undef
742             5 undef [8, 9]
743             6 undef [7]
744             7 { c => 8 } undef
745             8 undef [9]
746             9 { d => 10 } undef
747             10 1 undef undef
748             END
749              
750             ok $nfa->parse(qw(a b d));
751             ok $nfa->parse(qw(a c d));
752             ok !$nfa->parse(qw(a b c d));
753              
754              
755             This is a static method and so should either be imported or invoked as:
756              
757             Data::NFA::choice
758              
759              
760             =head2 except(@elements)
761              
762             Choice from amongst all symbols except the ones mentioned
763              
764             Parameter Description
765             1 @elements Elements not to be chosen from
766              
767             B
768              
769              
770             my $nfa = fromExpr(choice(qw(a b c)), 𝗲𝘅𝗰𝗲𝗽𝘁(qw(c x)), choice(qw(a b c)));
771              
772             is_deeply $nfa->print("(a|b|c)(c!x)(a|b|c)"), <
773             (a|b|c)(c!x)(a|b|c)
774             Location F Transitions Jumps
775             0 undef [1, 2, 4, 5, 7, 8]
776             1 undef [2, 4, 5, 7, 8]
777             2 { a => 3 } undef
778             3 undef [9, 10, 11, 13, 14]
779             4 undef [5]
780             5 { b => 6 } undef
781             6 undef [9, 10, 11, 13, 14]
782             7 undef [8]
783             8 { c => 9 } undef
784             9 undef [10, 11, 13, 14]
785             10 undef [11, 13, 14]
786             11 { a => 12 } undef
787             12 undef [15, 16, 17, 19, 20, 22, 23]
788             13 undef [14]
789             14 { b => 15 } undef
790             15 undef [16, 17, 19, 20, 22, 23]
791             16 undef [17, 19, 20, 22, 23]
792             17 { a => 18 } undef
793             18 1 undef [24]
794             19 undef [20]
795             20 { b => 21 } undef
796             21 1 undef [24]
797             22 undef [23]
798             23 { c => 24 } undef
799             24 1 undef undef
800             END
801              
802             ok !$nfa->parse(qw(a a));
803             ok $nfa->parse(qw(a a a));
804             ok !$nfa->parse(qw(a c a));
805              
806              
807             This is a static method and so should either be imported or invoked as:
808              
809             Data::NFA::except
810              
811              
812             =head1 Non deterministic finite state machine
813              
814             Create a non deterministic finite state machine to represent a regular expression.
815              
816             =head2 fromExpr(@expression)
817              
818             Create an NFA from a regular B<@expression>.
819              
820             Parameter Description
821             1 @expression Regular expressions
822              
823             B
824              
825              
826             my $nfa = 𝗳𝗿𝗼𝗺𝗘𝘅𝗽𝗿
827             ("a",
828             oneOrMore(choice(qw(b c))),
829             optional("d"),
830             element("e")
831             );
832              
833             is_deeply $nfa->print("a(b|c)+d?e"), <
834             a(b|c)+d?e
835             Location F Transitions Jumps
836             0 undef [1]
837             1 { a => 2 } undef
838             2 undef [3, 4, 5, 7, 8]
839             3 undef [4, 5, 7, 8]
840             4 undef [5, 7, 8]
841             5 { b => 6 } undef
842             6 undef [3, 4, 5, 7 .. 14]
843             7 undef [8]
844             8 { c => 9 } undef
845             9 undef [3, 4, 5, 7, 8, 10 .. 14]
846             10 undef [11 .. 14]
847             11 undef [12, 13, 14]
848             12 { d => 13 } undef
849             13 undef [14]
850             14 { e => 15 } undef
851             15 1 undef undef
852             END
853              
854             is_deeply ['a'..'e'], [$nfa->symbols];
855              
856             ok !$nfa->parse(qw(a e));
857             ok !$nfa->parse(qw(a d e));
858             ok $nfa->parse(qw(a b c e));
859             ok $nfa->parse(qw(a b c d e));
860              
861              
862             This is a static method and so should either be imported or invoked as:
863              
864             Data::NFA::fromExpr
865              
866              
867             =head2 print($states, $title)
868              
869             Print the current B<$states> of the non deterministic finite state automaton using the specified B<$title>. If it is non deterministic, the non deterministic jumps will be shown as well as the transitions table. If deterministic, only the transitions table will be shown.
870              
871             Parameter Description
872             1 $states States
873             2 $title Title
874              
875             B
876              
877              
878             my $nfa = fromExpr
879             ("a",
880             oneOrMore(choice(qw(b c))),
881             optional("d"),
882             element("e")
883             );
884              
885             is_deeply $nfa->𝗽𝗿𝗶𝗻𝘁("a(b|c)+d?e"), <
886             a(b|c)+d?e
887             Location F Transitions Jumps
888             0 undef [1]
889             1 { a => 2 } undef
890             2 undef [3, 4, 5, 7, 8]
891             3 undef [4, 5, 7, 8]
892             4 undef [5, 7, 8]
893             5 { b => 6 } undef
894             6 undef [3, 4, 5, 7 .. 14]
895             7 undef [8]
896             8 { c => 9 } undef
897             9 undef [3, 4, 5, 7, 8, 10 .. 14]
898             10 undef [11 .. 14]
899             11 undef [12, 13, 14]
900             12 { d => 13 } undef
901             13 undef [14]
902             14 { e => 15 } undef
903             15 1 undef undef
904             END
905              
906             is_deeply ['a'..'e'], [$nfa->symbols];
907              
908             ok !$nfa->parse(qw(a e));
909             ok !$nfa->parse(qw(a d e));
910             ok $nfa->parse(qw(a b c e));
911             ok $nfa->parse(qw(a b c d e));
912              
913              
914             =head2 symbols($states)
915              
916             Return an array of all the transition symbols.
917              
918             Parameter Description
919             1 $states States
920              
921             B
922              
923              
924             my $nfa = fromExpr
925             ("a",
926             oneOrMore(choice(qw(b c))),
927             optional("d"),
928             element("e")
929             );
930              
931             is_deeply $nfa->print("a(b|c)+d?e"), <
932             a(b|c)+d?e
933             Location F Transitions Jumps
934             0 undef [1]
935             1 { a => 2 } undef
936             2 undef [3, 4, 5, 7, 8]
937             3 undef [4, 5, 7, 8]
938             4 undef [5, 7, 8]
939             5 { b => 6 } undef
940             6 undef [3, 4, 5, 7 .. 14]
941             7 undef [8]
942             8 { c => 9 } undef
943             9 undef [3, 4, 5, 7, 8, 10 .. 14]
944             10 undef [11 .. 14]
945             11 undef [12, 13, 14]
946             12 { d => 13 } undef
947             13 undef [14]
948             14 { e => 15 } undef
949             15 1 undef undef
950             END
951              
952             is_deeply ['a'..'e'], [$nfa->𝘀𝘆𝗺𝗯𝗼𝗹𝘀];
953              
954             ok !$nfa->parse(qw(a e));
955             ok !$nfa->parse(qw(a d e));
956             ok $nfa->parse(qw(a b c e));
957             ok $nfa->parse(qw(a b c d e));
958              
959              
960             =head2 isFinal($states, $state)
961              
962             Whether, in the B<$states> specifying an NFA the named state B<$state> is a final state.
963              
964             Parameter Description
965             1 $states States
966             2 $state Name of state to test
967              
968             B
969              
970              
971             my $nfa = fromExpr(element("a"));
972             ok $nfa->print("Element: a") eq <
973             Element: a
974             Location F Transitions Jumps
975             0 undef [1]
976             1 { a => 2 } undef
977             2 1 undef undef\
978             END
979             ok $nfa->𝗶𝘀𝗙𝗶𝗻𝗮𝗹(2);
980             ok !$nfa->𝗶𝘀𝗙𝗶𝗻𝗮𝗹(0);
981             ok $nfa->parse(qw(a));
982             ok !$nfa->parse(qw(a b));
983             ok !$nfa->parse(qw(b));
984             ok !$nfa->parse(qw(b a));
985              
986              
987             =head2 allTransitions($states)
988              
989             Return all transitions in the NFA specified by B<$states> as {stateName}{symbol} = [reachable states].
990              
991             Parameter Description
992             1 $states States
993              
994             B
995              
996              
997             my $s = q(zeroOrMore(choice("a")));
998              
999             my $nfa = eval qq(fromExpr(sequence($s,$s)));
1000              
1001             is_deeply $nfa->print("a*"), <
1002             a*
1003             Location F Transitions Jumps
1004             0 1 undef [1 .. 4, 6 .. 9, 11]
1005             1 1 undef [2, 3, 4, 6 .. 9, 11]
1006             2 1 undef [3, 4, 6 .. 9, 11]
1007             3 undef [4]
1008             4 { a => 5 } undef
1009             5 1 undef [2, 3, 4, 6 .. 9, 11]
1010             6 1 undef [7, 8, 9, 11]
1011             7 1 undef [8, 9, 11]
1012             8 undef [9]
1013             9 { a => 10 } undef
1014             10 1 undef [7, 8, 9, 11]
1015             11 1 undef undef
1016             END
1017              
1018             ok $nfa->parse(qw());
1019             ok $nfa->parse(qw(a));
1020             ok !$nfa->parse(qw(b));
1021             ok $nfa->parse(qw(a a));
1022             ok !$nfa->parse(qw(b b));
1023             ok !$nfa->parse(qw(a b));
1024             ok !$nfa->parse(qw(b a));
1025             ok !$nfa->parse(qw(c));
1026              
1027             is_deeply $nfa->𝗮𝗹𝗹𝗧𝗿𝗮𝗻𝘀𝗶𝘁𝗶𝗼𝗻𝘀, {
1028             "0" => { a => [10, 11, 2 .. 9] },
1029             "1" => { a => [10, 11, 2 .. 9] },
1030             "2" => { a => [10, 11, 2 .. 9] },
1031             "3" => { a => [11, 2 .. 9] },
1032             "4" => { a => [11, 2 .. 9] },
1033             "5" => { a => [10, 11, 2 .. 9] },
1034             "6" => { a => [10, 11, 7, 8, 9] },
1035             "7" => { a => [10, 11, 7, 8, 9] },
1036             "8" => { a => [10, 11, 7, 8, 9] },
1037             "9" => { a => [10, 11, 7, 8, 9] },
1038             "10" => { a => [10, 11, 7, 8, 9] },
1039             "11" => { a => [] },
1040             };
1041              
1042             is_deeply $nfa->print("a*a* 2"), <
1043             a*a* 2
1044             Location F Transitions Jumps
1045             0 1 undef [1 .. 4, 6 .. 9, 11]
1046             1 1 undef [2, 3, 4, 6 .. 9, 11]
1047             2 1 undef [3, 4, 6 .. 9, 11]
1048             3 undef [4]
1049             4 { a => 5 } undef
1050             5 1 undef [2, 3, 4, 6 .. 9, 11]
1051             6 1 undef [7, 8, 9, 11]
1052             7 1 undef [8, 9, 11]
1053             8 undef [9]
1054             9 { a => 10 } undef
1055             10 1 undef [7, 8, 9, 11]
1056             11 1 undef undef
1057             END
1058              
1059              
1060             =head2 parse($states, @symbols)
1061              
1062             Parse, using the NFA specified by B<$states>, the list of symbols in L<@symbols>.
1063              
1064             Parameter Description
1065             1 $states States
1066             2 @symbols Array of symbols
1067              
1068             B
1069              
1070              
1071             my $nfa = fromExpr(element("a"));
1072             ok $nfa->print("Element: a") eq <
1073             Element: a
1074             Location F Transitions Jumps
1075             0 undef [1]
1076             1 { a => 2 } undef
1077             2 1 undef undef\
1078             END
1079             ok $nfa->isFinal(2);
1080             ok !$nfa->isFinal(0);
1081             ok $nfa->𝗽𝗮𝗿𝘀𝗲(qw(a));
1082             ok !$nfa->𝗽𝗮𝗿𝘀𝗲(qw(a b));
1083             ok !$nfa->𝗽𝗮𝗿𝘀𝗲(qw(b));
1084             ok !$nfa->𝗽𝗮𝗿𝘀𝗲(qw(b a));
1085              
1086              
1087              
1088             =head2 Data::NFA::State Definition
1089              
1090              
1091             NFA State
1092              
1093              
1094              
1095              
1096             =head3 Output fields
1097              
1098              
1099             B - Whether this state is final
1100              
1101             B - {to => 1} : jumps from this state not consuming any input symbols
1102              
1103             B - {symbol => state} : transitions from this state consuming one input symbol
1104              
1105              
1106              
1107             =head1 Private Methods
1108              
1109             =head2 newNfa(%options)
1110              
1111             Create a new NFA
1112              
1113             Parameter Description
1114             1 %options Options
1115              
1116             =head2 newNfaState(%options)
1117              
1118             Create a new NFA state.
1119              
1120             Parameter Description
1121             1 %options Options
1122              
1123             =head2 addNewState($nfa)
1124              
1125             Create a new NFA state and add it to an NFA created with L.
1126              
1127             Parameter Description
1128             1 $nfa Nfa
1129              
1130             =head2 fromExpr2($states, $expr, $symbols)
1131              
1132             Create an NFA from a regular expression.
1133              
1134             Parameter Description
1135             1 $states States
1136             2 $expr Regular expression constructed from L L L L L L
1137             3 $symbols Set of symbols used by the NFA.
1138              
1139             =head2 propagateFinalState($states)
1140              
1141             Mark the B<$states> that can reach the final state with a jump as final.
1142              
1143             Parameter Description
1144             1 $states States
1145              
1146             =head2 statesReachableViaJumps($states, $StateName)
1147              
1148             Find the names of all the B<$states> that can be reached from a specified B<$stateName> via jumps alone.
1149              
1150             Parameter Description
1151             1 $states States
1152             2 $StateName Name of start state
1153              
1154             =head2 removeEmptyFields($states)
1155              
1156             Remove empty fields from the B representing an NFA.
1157              
1158             Parameter Description
1159             1 $states States
1160              
1161             =head2 printFinalState($state)
1162              
1163             Print the final field of the specified B<$state>.
1164              
1165             Parameter Description
1166             1 $state State
1167              
1168             =head2 printWithJumps($states, $title)
1169              
1170             Print the current B<$states> of an NFA with jumps using the specvified B<$title>.
1171              
1172             Parameter Description
1173             1 $states States
1174             2 $title Optional title
1175              
1176             =head2 printWithOutJumps($states, $title)
1177              
1178             Print the current B<$states> of an NFA without jumps using the specified B<$title>.
1179              
1180             Parameter Description
1181             1 $states States
1182             2 $title Title.
1183              
1184             =head2 statesReachableViaSymbol($states, $StateName, $symbol, $cache)
1185              
1186             Find the names of all the states that can be reached from a specified state via a specified symbol and all the jumps available.
1187              
1188             Parameter Description
1189             1 $states States
1190             2 $StateName Name of start state
1191             3 $symbol Symbol to reach on
1192             4 $cache A hash to be used as a cache
1193              
1194             =head2 parse2($states, $stateName, @symbols)
1195              
1196             Parse an array of symbols
1197              
1198             Parameter Description
1199             1 $states States
1200             2 $stateName Current state
1201             3 @symbols Remaining symbols
1202              
1203              
1204             =head1 Index
1205              
1206              
1207             1 L - Create a new NFA state and add it to an NFA created with L.
1208              
1209             2 L - Return all transitions in the NFA specified by B<$states> as {stateName}{symbol} = [reachable states].
1210              
1211             3 L - Choice from amongst one or more elements and/or symbols.
1212              
1213             4 L - One element.
1214              
1215             5 L - Choice from amongst all symbols except the ones mentioned
1216              
1217             6 L - Create an NFA from a regular B<@expression>.
1218              
1219             7 L - Create an NFA from a regular expression.
1220              
1221             8 L - Whether, in the B<$states> specifying an NFA the named state B<$state> is a final state.
1222              
1223             9 L - Create a new NFA
1224              
1225             10 L - Create a new NFA state.
1226              
1227             11 L - One or more repetitions of a sequence of elements and/or symbols.
1228              
1229             12 L - An optional sequence of elements and/or symbols.
1230              
1231             13 L - Parse, using the NFA specified by B<$states>, the list of symbols in L<@symbols>.
1232              
1233             14 L - Parse an array of symbols
1234              
1235             15 L - Print the current B<$states> of the non deterministic finite state automaton using the specified B<$title>.
1236              
1237             16 L - Print the final field of the specified B<$state>.
1238              
1239             17 L - Print the current B<$states> of an NFA with jumps using the specvified B<$title>.
1240              
1241             18 L - Print the current B<$states> of an NFA without jumps using the specified B<$title>.
1242              
1243             19 L - Mark the B<$states> that can reach the final state with a jump as final.
1244              
1245             20 L - Remove empty fields from the B representing an NFA.
1246              
1247             21 L - Sequence of elements and/or symbols.
1248              
1249             22 L - Find the names of all the B<$states> that can be reached from a specified B<$stateName> via jumps alone.
1250              
1251             23 L - Find the names of all the states that can be reached from a specified state via a specified symbol and all the jumps available.
1252              
1253             24 L - Return an array of all the transition symbols.
1254              
1255             25 L - Zero or more repetitions of a sequence of elements and/or symbols.
1256              
1257             =head1 Installation
1258              
1259             This module is written in 100% Pure Perl and, thus, it is easy to read,
1260             comprehend, use, modify and install via B:
1261              
1262             sudo cpan install Data::NFA
1263              
1264             =head1 Author
1265              
1266             L
1267              
1268             L
1269              
1270             =head1 Copyright
1271              
1272             Copyright (c) 2016-2019 Philip R Brenan.
1273              
1274             This module is free software. It may be used, redistributed and/or modified
1275             under the same terms as Perl itself.
1276              
1277             =cut
1278              
1279              
1280              
1281             # Tests and documentation
1282              
1283             sub test
1284 1     1 0 14 {my $p = __PACKAGE__;
1285 1         10 binmode($_, ":utf8") for *STDOUT, *STDERR;
1286 1 50       62 return if eval "eof(${p}::DATA)";
1287 1         53 my $s = eval "join('', <${p}::DATA>)";
1288 1 50       8 $@ and die $@;
1289 1     1   7 eval $s;
  1     1   2  
  1     1   38  
  1         6  
  1         2  
  1         24  
  1         601  
  1         66102  
  1         12  
  1         80  
1290 1 50       799 $@ and die $@;
1291 1         133 1
1292             }
1293              
1294             test unless caller;
1295              
1296             1;
1297             # podDocumentation
1298             __DATA__