File Coverage

blib/lib/Filter/Heredoc.pm
Criterion Covered Total %
statement 190 199 95.4
branch 68 78 87.1
condition 5 9 55.5
subroutine 20 20 100.0
pod 3 3 100.0
total 286 309 92.5


line stmt bran cond sub pod time code
1             package Filter::Heredoc;
2              
3 24     24   1378611 use 5.010;
  24         265  
4 24     24   97 use strict;
  24         39  
  24         385  
5 24     24   100 use warnings;
  24         33  
  24         1028  
6              
7             our $VERSION = '0.05';
8              
9             =head1 NAME
10              
11             Filter::Heredoc - Search and filter embedded here documents
12              
13             =head1 VERSION
14              
15             Version 0.05
16              
17             =cut
18              
19 24     24   152 use base qw(Exporter);
  24         69  
  24         3324  
20 24     24   139 use feature 'state';
  24         35  
  24         2654  
21              
22 24     24   123 use Carp;
  24         30  
  24         1287  
23 24     24   7870 use Filter::Heredoc::Rule qw ( _hd_is_rules_ok_line ); # intra sub #
  24         49  
  24         44860  
24              
25             # private subroutines only used in author tests
26             our @EXPORT_OK = qw (
27             hd_init
28             hd_getstate
29             hd_labels
30             _is_comment
31             _state
32             _strip_quotes
33             _infifo
34             _is_ingress
35             _is_egress
36             _strip_tabdelimiter
37             _infifotab
38             _strip_trailing_pipe
39             @CARP_UNDEF
40             @CARP_EGRESS
41             @CARP_INGRESS
42             );
43              
44             # our thrown exceptions. What's wrong, and why it's wrong.
45             our @CARP_UNDEF = (
46             "\nPassed argument to function is undef",
47             "\nCan't determine state from an undef argument",
48             "\n"
49             );
50             our @CARP_EGRESS = (
51             "\nCurrent state is Egress, and passed line say we shall change to Egress again",
52             "\nNot allowed change i.e. Egress --> Egress",
53             "\n"
54             );
55             our @CARP_INGRESS = (
56             "\nCurrent state is Ingress, and passed line say we shall change to Ingress again",
57             "\nNot allowed change i.e. Ingress --> Ingress",
58             "\n"
59             );
60              
61             ### Export_ok subroutines starts here ###
62              
63             ### INTERFACE SUBROUTINE ###
64             # Usage : hd_getline ( $line)
65             # Purpose : Main routine to determine state changes based on the
66             # previous (existing state) and the $line (argument).
67             # Returns : Hash with state labels indicating the new state
68             # Throws : Yes, see above @CARP-globals
69              
70             sub hd_getstate {
71 557     557 1 264312 my $EMPTY_STR = q{};
72 557         854 my $line = shift;
73 557         921 my %marker = hd_labels();
74 557         860 my @parselineitems;
75 557         709 my $COPYOUTFROMFIFO = 1;
76              
77 557         1183 my %state = (
78             statemarker => $EMPTY_STR,
79             blockdelimiter => $EMPTY_STR,
80             is_tabremoveflag => $EMPTY_STR,
81             );
82              
83             # Argument (the text line) can not be undef
84 557 50       975 if ( !defined($line) ) {
85 0         0 Carp::confess(@CARP_UNDEF); # trap with eval otherwise die
86             }
87              
88 557         793 chomp $line;
89              
90             =for StateTests:
91             The $line is either the ingress- or egress text line, were the state
92             flag needs to toggle, or this is either another full text line of source
93             or here document were nothing change if last one was the same.
94             The initial state is not important for the start.
95              
96             =cut
97              
98             ###############################################################
99             ### State change tests (source --> source, source -> ingress)
100             ###############################################################
101              
102             # Test if last state was in 'source'
103 557 100       1209 if ( _state() eq $marker{source} ) {
104              
105             # Test change to 'heredoc' with basic assumption on match for '<<'
106 307 100       484 if ( _is_ingress($line) ) {
107              
108             # Bugfix DBNX#13
109 48         180 $line =~ s/\s+$//; # remove trailing white spaces before split()
110              
111             # endfix
112              
113             # Each shell ingress text line may contain multiple delimiters
114 48         135 @parselineitems = split /;/, $line;
115              
116             # Process each delimiter (split by ';')
117 48         153 while ( defined( my $tmpdelim = shift @parselineitems ) ) {
118              
119             # Ensure that any parsed sub-lines is not an inline comment
120 59 100       99 if ( _is_comment($tmpdelim) ) {
121 4         10 next;
122             }
123              
124             # Bugfix DBNX#11 remove the trailing pipe '|', and any cmd behind
125             # it, if present. Applies to 'cat <
126 55         106 $tmpdelim = _strip_trailing_pipe($tmpdelim);
127              
128             # endfix
129              
130             # Extract the delimiter under POSIX assumptions
131 55         87 my $subdelimiter = $EMPTY_STR;
132 55         74 my $final_delimiter = $EMPTY_STR;
133 55         108 $subdelimiter = _get_posix_delimiter($tmpdelim);
134              
135             # The saved delimiter can not contain '-' if line was '<<-EOF'
136 55         133 $final_delimiter = _strip_tabdelimiter($subdelimiter);
137              
138             # Set the tab delimiter flag for processing by caller
139 55 100       142 if ( $final_delimiter ne $subdelimiter ) {
140 7         29 _infifotab(1); # insert tab removal true flag
141             }
142             else {
143 48         154 _infifotab($EMPTY_STR); # no tab removal condition
144             }
145              
146             # Save target 'terminator' to identify egress condition
147 55         118 _infifo($final_delimiter);
148             }
149              
150             # Update state
151 48         197 _state( $marker{ingress} );
152              
153             # Only heredoc/egress lines are applicable for tab removal flag
154             %state = (
155             statemarker => $marker{ingress},
156 48         289 is_tabremoveflag => $EMPTY_STR,
157             blockdelimiter => $EMPTY_STR, # ingress is not a here-doc
158             );
159 48         399 return %state; # Ingress - all delimiters processed on the line
160              
161             } # end if-ingress
162              
163             # prepare state hash with no state change from source
164 259         522 _state( $marker{source} );
165             %state = (
166             statemarker => $marker{source},
167 259         474 is_tabremoveflag => _infifotab( q{}, $COPYOUTFROMFIFO ),
168             blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
169             );
170 259         1121 return %state; #source
171              
172             } # end if-source
173              
174             ###############################################################
175             ### State change tests (ingress --> heredoc), and
176             ### non valid state change (ingress --> ingress)
177             ###############################################################
178              
179             # Test if last state was in 'ingress'
180 250 100       409 if ( _state() eq $marker{ingress} ) {
181 48 50       92 if ( !_is_ingress($line) ) {
182              
183 48         110 _state( $marker{heredoc} );
184             %state = (
185             statemarker => $marker{heredoc},
186 48         116 blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
187             is_tabremoveflag => _infifotab( q{}, $COPYOUTFROMFIFO ),
188             );
189 48         244 return %state; # heredoc
190              
191             # Throw an exception with full backtrace, including above error message!
192             }
193             else {
194 0         0 Carp::confess(@CARP_INGRESS); # trap with eval otherwise die
195 0         0 return;
196             }
197              
198             } # end if-ingress
199              
200             ###############################################################
201             ### State change tests (heredoc --> heredoc, heredoc -> egress)
202             ###############################################################
203              
204             # Test if last state was in 'heredoc'
205 202 100       305 if ( _state() eq $marker{heredoc} ) {
206              
207 148 100       233 if ( _is_egress($line) ) {
208              
209             # Prepare state hash and change state from heredoc
210 54         159 _state( $marker{egress} );
211             %state = (
212             statemarker => $marker{egress},
213 54         115 blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
214             is_tabremoveflag => _infifotab(), # removes the tab flag
215             );
216 54         134 _infifo(); # removes the delimiter from the fifo array
217              
218 54         276 return %state; # egress
219              
220             } # end if-egress
221              
222             # Prepare state hash with no state change from heredoc
223 94         202 _state( $marker{heredoc} );
224             %state = (
225             statemarker => $marker{heredoc},
226 94         180 is_tabremoveflag => _infifotab( q{}, $COPYOUTFROMFIFO ),
227             blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
228             );
229              
230 94         454 return %state; #heredoc
231              
232             } # end if-heredoc
233              
234             ###############################################################
235             ### State change tests (egress --> source, egress --> heredoc)
236             ### and test for non valid state change (egress --> egress)
237             ###############################################################
238              
239             # Test if last state was in 'egress'
240 54 50       134 if ( _state() eq $marker{egress} ) {
241              
242 54         107 my $fifolength = length( _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ) );
243              
244             # Infifo terminator doesn't contains any delimiters, change to source
245 54 100       121 if ( $fifolength == 0 ) {
246              
247 47         136 _state( $marker{source} );
248             %state = (
249             statemarker => $marker{source},
250 47         104 is_tabremoveflag => $EMPTY_STR,
251             blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
252             );
253 47         230 return %state; #source
254             }
255              
256 7 50 33     26 if ( ( $fifolength != 0 ) && ( _is_egress($line) ) ) {
257              
258             # Unexpected direct egress line again
259 0         0 Carp::confess(@CARP_EGRESS); # trap with eval otherwise die
260 0         0 return;
261             }
262             else {
263              
264             # Terminator array does not match - change state back to heredoc
265 7         18 _state( $marker{heredoc} );
266             %state = (
267             statemarker => $marker{heredoc},
268 7         19 blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
269             is_tabremoveflag => _infifotab( q{}, $COPYOUTFROMFIFO ),
270             );
271              
272 7         36 return %state; #heredoc
273              
274             }
275              
276             } # end if-egress
277              
278             }
279             ### INTERFACE SUBROUTINE ###
280             # Usage : hd_labels() or hd_labels( %newlabels )
281             # Purpose : Subroutine to get/set state labels.
282             # default labels are 'S', 'I', 'H' and 'E'.
283             # (i.e Source, Ingress, Heredoc, or Egress)
284             # Returns : Hash with the definition of labels for each state
285             # Throws : No
286              
287             sub hd_labels {
288 2184     2184 1 4828 my %arg = @_;
289 2184         2260 my %marker;
290              
291 2184 100       3952 $arg{source} = q{S} unless exists $arg{source};
292 2184 100       3390 $arg{ingress} = q{I} unless exists $arg{ingress};
293 2184 100       3207 $arg{heredoc} = q{H} unless exists $arg{heredoc};
294 2184 100       3231 $arg{egress} = q{E} unless exists $arg{egress};
295              
296 2184         2288 state $source = $arg{source};
297 2184         2120 state $ingress = $arg{ingress};
298 2184         2124 state $heredoc = $arg{heredoc};
299 2184         2180 state $egress = $arg{egress};
300              
301 2184         8177 return %marker = (
302             source => $source,
303             ingress => $ingress,
304             heredoc => $heredoc,
305             egress => $egress,
306             );
307             }
308              
309             ### INTERFACE SUBROUTINE ###
310             # Usage : hd_init()
311             # Purpose : Empties the terminator and tab arrays and set the internal
312             # state to source. Used after each file processed in case of
313             # the ingress/egress conditions are not found properly.
314             # Default labels are 'S', 'I', 'H' and 'E'.
315             # (i.e Source, Ingress, Heredoc, or Egress)
316             # Returns : $EMPTY_STR
317             # Throws : No
318              
319             sub hd_init {
320 1     1 1 927 my %marker = hd_labels(); # get default markers
321 1         2 my $initstate = $marker{source}; # default initial state
322 1         2 my $EMPTY_STR = q{};
323              
324             # Set the state to source
325 1         2 _state($initstate);
326              
327             # Empty the terminator array
328             FIFOLOOP:
329 1         2 while ( _infifo() ) {
330 1         3 next FIFOLOOP;
331             }
332              
333             # empty the tab array
334             TABLOOP:
335 1         2 while ( _infifotab() ) {
336 1         2 next TABLOOP;
337             }
338              
339 1         3 return $EMPTY_STR;
340             }
341              
342              
343             ### The Module private subroutines starts here ###
344              
345             ### INTERNAL UTILITY ###
346             # Usage : _is_comment( $line )
347             # Purpose : Prevent a false ingress condition if line is a comment.
348             # Returns : True (1) or False ($EMPTY_STR)
349             # Throws : No
350              
351             sub _is_comment {
352 414     414   460 my $EMPTY_STR = q{};
353 414         453 my $line;
354              
355 414 50       778 if ( !defined( $line = shift ) ) {
356 0         0 return $EMPTY_STR;
357             }
358              
359             # If only white space left of the '#' its a comment.
360 414         680 $line =~ tr/ \t\n\r\f//d;
361              
362             # Test first character for '#', i.e. index() return 0.
363 414 100       910 if ( index( $line, '#' ) == 0 ) {
364 86         190 return 1;
365             }
366              
367 328         612 return $EMPTY_STR; # It's not a comment
368             }
369              
370             ### INTERNAL UTILITY ###
371             # Usage : _is_ingress( $line )
372             # Purpose : Determine if line is an ingress line (regex /<
373             # Returns : True (1) or False ($EMPTY_STR)
374             # Throws : No
375              
376             sub _is_ingress {
377 355     355   452 my $line = shift;
378 355         406 my $EMPTY_STR = q{};
379              
380 355 100       475 if ( !_is_comment($line) ) {
381              
382 273 100       645 if ( $line =~ m/<
383              
384             ## Prevent false positives (Filter::Heredoc::Rule) ##
385 52 100       164 if ( !_hd_is_rules_ok_line($line) ) {
386 4         10 return $EMPTY_STR; # FALSE, not an ingress line
387             }
388              
389 48         109 return 1; # TRUE
390             }
391             }
392 303         608 return $EMPTY_STR; # FALSE
393             }
394              
395             ### INTERNAL UTILITY ###
396             # Usage : _is_egress( $line )
397             # Purpose : Determine if line is an egress line
398             # Returns : True (1) or False ($EMPTY_STR)
399             # Throws : No
400              
401             sub _is_egress {
402 155     155   214 my $line = shift;
403 155         175 my $EMPTY_STR = q{};
404 155         181 my $nextoutdelimiter = $EMPTY_STR;
405 155         171 my $COPYOUTFROMFIFO = 1;
406              
407             =for EgressNotes:
408             To be a valid delimter, first word in line must match next infifo terminator.
409             split() defaults to split on ' ' and on $_ (and this is not same as //!)
410             Currently no rule helper is used on the egress delimiter.
411             Removes all trailing white space (and if no word, all is removed)
412              
413             =cut
414              
415 155         233 $_ = $line;
416 155         384 my @linefield = split;
417              
418             # Check what is waiting (do not remove) from fifo of delimiters
419 155         241 $nextoutdelimiter = _infifo( $EMPTY_STR, $COPYOUTFROMFIFO );
420              
421             # Stop processing, no delimiters in fifo
422 155 50       291 if ( $nextoutdelimiter eq $EMPTY_STR ) {
423 0         0 return $EMPTY_STR;
424             }
425              
426             # Line is undef for lines with white space
427 155 100       357 if ( !defined( $linefield[0] ) ) {
    100          
428 17         36 return $EMPTY_STR; # FALSE
429             }
430             elsif ( $nextoutdelimiter eq $linefield[0] ) {
431 54         131 return 1; # TRUE
432             }
433              
434 84         204 return $EMPTY_STR; # FALSE
435             }
436              
437             ### INTERNAL UTILITY ###
438             # Usage : _get_posix_delimiter( $line )
439             # Purpose : Extracts the delimiter and assumes POSIX i.e. white
440             # space is not significant between '<<' and 'delimiter'.
441             # Returns : The delimiter itself (includes '-' if << -EOT).
442             # Throws : No
443              
444             sub _get_posix_delimiter {
445 55     55   76 my $tmpdelim = shift;
446 55         74 my $EMPTY_STR = q{};
447 55         68 my $subdelimiter = $EMPTY_STR;
448              
449             # Remove all quote characters and get the delimiter itself
450 55         236 $tmpdelim =~ s/\s+//g; # removes all white space (becomes one word)
451 55         128 $tmpdelim = _strip_quotes($tmpdelim); # removes any [ " ' \ ]
452 55         194 $tmpdelim =~ m/<{2}(.*)/;
453 55         145 $subdelimiter = $1;
454              
455 55         98 return $subdelimiter;
456             }
457              
458             ### INTERNAL UTILITY ###
459             # Usage : _state() or _state( q{E} )
460             # Purpose : Subroutine to get/set the persistent state.
461             # Returns : The state (label) of the state machine when called.
462             # Throws : No
463              
464             sub _state {
465 1622     1622   2021 my %marker = hd_labels();
466 1622         2097 state $linestate = $marker{source}; # default initial state
467 1622         1809 my $newstate = shift;
468              
469             # Set or get the new state
470 1622 100       2435 $linestate = $newstate if defined $newstate;
471              
472 1622         3168 return $linestate;
473             }
474              
475             ### INTERNAL UTILITY ###
476             # Usage : _strip_quotes( $line )
477             # Purpose : Before a delimiter is ready to be saved, quotes shall
478             # first be removed.
479             # Returns : String without any quotes or escapes character i.e. [" ' \ ].
480             # Throws : No
481              
482             sub _strip_quotes {
483 55     55   86 my $tmpstr = shift;
484 55         64 my $noquotesstr;
485              
486 55         97 $tmpstr =~ tr/\\//d; # remove all [\];
487 55         67 $tmpstr =~ tr/"//d; # remove all ["];
488 55         95 $tmpstr =~ tr/'//d; # remove all ['];
489              
490 55         93 $noquotesstr = $tmpstr;
491              
492 55         98 return $noquotesstr;
493             }
494              
495             ### INTERNAL UTILITY ###
496             # Usage : _strip_tabdelimiter( $line )
497             # Purpose : Removes the tab-delimiter '-' after '<<' if present.
498             # Returns : String without '-' or the original string not present.
499             # Throws : No
500              
501             sub _strip_tabdelimiter {
502 55     55   88 my $line = shift;
503              
504             # Get the string after '-'
505 55 100       210 if ( $line =~ m/^-(.*)/ ) {
506 7         21 return $1;
507             }
508              
509 48         85 return $line; # ..otherwise return the original string
510             }
511              
512             ### INTERNAL UTILITY ###
513             # Usage : _infifo( $line ), _infifo(), _infifo( $EMPTY_STR, 1 )
514             # Purpose : Accessor routine to insert/extract delimiter from fifo array.
515             # When extracting, the delimiter is fully removed from array.
516             # The last syntax looks for next delimiter without removing it.
517             # Returns : Returns the delimiter or an $EMPTY_STR when no delimiters exists.
518             # Throws : No
519              
520             sub _infifo {
521 829     829   971 my $EMPTY_STR = q{};
522 829         967 my $delimiter = shift;
523 829   66     1360 my $copyoutfromfifo = shift || $EMPTY_STR; # default FALSE
524 829         892 my $nextelementout;
525              
526             # Holds the egress terminator(s) at any given time
527 829         796 state @terminators;
528              
529             # Test that its not the pre-view mode
530 829 100       1126 if ( !$copyoutfromfifo ) {
531              
532             # Insert the new delimiter in the fifo array
533 111 100       193 if ( defined $delimiter ) {
534 55         96 push @terminators, $delimiter;
535 55         203 return;
536             }
537             else {
538              
539             # Shift out next delimiter
540 56 100       108 if ( defined( my $tmp = shift @terminators ) ) {
541 55         85 return $tmp;
542             }
543             else {
544 1         3 return $EMPTY_STR; # fifo array is empty
545             }
546             }
547             }
548              
549             # Neither insert or extract - pre-view next array element in the array
550             else {
551              
552             # Third mode of syntax, '$copyoutfromfifo' is not-false from above
553 718 50       1310 if ( $delimiter eq $EMPTY_STR ) {
554              
555             # Get one delimiter from the terminator fifo array
556 718 100       1123 if ( defined( $nextelementout = shift @terminators ) ) {
557              
558             # Preserve the fifo array insert the delimiter again
559 365         534 unshift @terminators, $nextelementout;
560 365         869 return $nextelementout;
561             }
562             else {
563 353         895 return $EMPTY_STR;
564             }
565             }
566             }
567              
568             }
569              
570             ### INTERNAL UTILITY ###
571             # Usage : _infifotab( $flag ), _infifotab(), _infifotab( $EMPTY_STR, 1 )
572             # Purpose : Accessor routine to insert/extract true/false from tabfifo array.
573             # When extracting, the value is fully removed from array.
574             # The last syntax looks for next flag value without removing it.
575             # Returns : Returns 1 (true) or an $EMPTY_STR when no flags exists.
576             # Throws : No
577              
578             sub _infifotab {
579 519     519   607 my $EMPTY_STR = q{};
580 519         624 my $istabremoveflag = shift; # this is either $EMPTY_STR, or '1' i.e true
581 519   66     1005 my $copyoutfromfifo = shift || $EMPTY_STR; # default FALSE
582 519         569 my $nextelementout;
583              
584             # Holds tab-removal flags at any given time
585 519         525 state @tabremovals;
586              
587             # Test that its not the pre-view mode
588 519 100       853 if ( !$copyoutfromfifo ) {
589              
590             # Add the new flag value to fifo
591 111 100       182 if ( defined $istabremoveflag ) {
592 55         87 push @tabremovals, $istabremoveflag;
593 55         124 return;
594             }
595             else {
596              
597             # Shift out next flag value
598 56 100       126 if ( defined( my $tmp = shift @tabremovals ) ) {
599 55         162 return $tmp;
600             }
601             else {
602 1         3 return $EMPTY_STR; # fifo array is empty
603             }
604             }
605             }
606              
607             # Neither insert or extract - pre-view next array element in the array
608             else {
609              
610             # Third mode of syntax, '$copyoutfromfifo' is not-false from above
611 408 50       880 if ( $istabremoveflag eq $EMPTY_STR ) {
612              
613             # Get one tab delimiter from the tabremoval fifo array
614 408 100       709 if ( defined( $nextelementout = shift @tabremovals ) ) {
615              
616             # Preserve the fifo array insert the flag again
617 149         208 unshift @tabremovals, $nextelementout;
618 149         361 return $nextelementout;
619             }
620             else {
621 259         603 return $EMPTY_STR;
622             }
623             }
624             }
625              
626             }
627              
628             ### INTERNAL UTILITY ###
629             # Usage : _strip_trailing_pipe( $line )
630             # Purpose : Ingress line characters after a pipe (and an optional shell
631             # command) must be removed to allow extracting the delimiter.
632             # Returns : The line, with everything after the pipe removed incl the pipe
633             # or the line untouched if there is no pipe.
634             # Throws : No
635              
636             sub _strip_trailing_pipe {
637 55     55   82 my $EMPTY_STR = q{};
638 55         98 my $line = shift;
639 55         83 my $newline = $EMPTY_STR;
640              
641 55 50       116 if ( !defined($line) ) {
642 0         0 return $EMPTY_STR;
643             }
644              
645 55         185 my $regexpipe = qr/\|/;
646 55         148 my $regexcapture = qr/^(.*)\|/;
647              
648             # If no pipe return original line
649 55 100       223 if ( $line !~ $regexpipe ) {
650 47         141 return $line;
651             }
652              
653             # Capture everything up to the pipe symbol
654 8 50       42 if ( $line =~ $regexcapture ) {
655 8         20 $newline = $1;
656 8         26 return $newline;
657             }
658              
659 0           return $line; # If match fails returns the original string
660             }
661              
662              
663             =head1 SYNOPSIS
664              
665             use 5.010;
666             use Filter::Heredoc qw( hd_getstate hd_init hd_labels );
667             use Filter::Heredoc::Rule qw( hd_syntax );
668            
669             my $line;
670             my %state;
671            
672             # Get the defined labels to compare with the returned state
673             my %label = hd_labels();
674              
675             # Read a file line-by-line and print only the here document
676             while (defined( $line = )) {
677             %state = hd_getstate( $line );
678             print $line if ( $state{statemarker} eq $label{heredoc} );
679             if ( eof ) {
680             close( ARGV );
681             hd_init(); # Prevent state errors to propagate to next file
682             }
683             }
684              
685             # Test a line (is this an opening delimiter line?)
686             $line = q{cat <
687             %state = hd_getstate( $line );
688             print "$line\n" if ( $state{statemarker} eq $label{ingress} );
689            
690             # Load a syntax helper rule (shell script is built in)
691             hd_syntax ( 'pod' );
692              
693             =head1 DESCRIPTION
694              
695             This is the core module for I. If you're not looking
696             to extend or alter the behavior of this module, you probably want to
697             look at L instead.
698              
699             I provides subroutines to search and print here
700             documents. Here documents (also called "here docs") allow a type of
701             input redirection from some following text. This is often used to embed
702             short text messages (or configuration files) within shell scripts.
703              
704             This module extracts here documents from POSIX IEEE Std 1003.1-2008
705             compliant shell scripts. Perl have derived a similar syntax but is at
706             the same time different in many details.
707              
708             Rules can be added to enhance here document extraction, i.e. prevent
709             "false positives". L exports an additional
710             subroutine to load and unload rules.
711              
712             This version supports a basic C rule. Current subroutines can be
713             tested on Perl scripts if the code constructs use a near POSIX form
714             of here documents. With that said don't rely on the current version
715             for Perl since it's still in a very early phase of development.
716              
717             =head2 Concept to parse here documents.
718              
719             This is a line-by-line state machine design. Reading from the beginning
720             to the end of a script results in following state changes:
721              
722             Source --> Here document --> Source
723            
724             What tells a source line from a here document line apart? Nothing!
725             However if adding an opening and closing delimiter state I tracking
726             previous state we can identify what is source and what's a here document:
727              
728             Source --> Ingress --> Here document --> Egress --> Source
729              
730             In reality there are few more state changes defined by POSIX.
731             An example of this is the script below and with added state labels:
732              
733             S] #!/bin/bash --posix
734             I] cat <
735             H] Hi,
736             E] eof1
737             H] Helene.
738             E] eof2
739             S]
740              
741             Naturally, when bash runs this only the here document is printed:
742              
743             Hi,
744             Helene.
745              
746             =head1 SUBROUTINES
747              
748             I exports following subroutines only on request.
749              
750             hd_getstate # returns a label based on the argument (text line)
751             hd_labels # reads out and (optionally) define new labels
752             hd_init # flushes the internal state machine
753            
754             L exports one subroutine to load and unload
755             syntax rules.
756              
757             hd_syntax # load/unload a script syntax rule
758              
759             =head2 B
760              
761             This routine determines the new state, based on last state C the
762             new text line in the argument.
763              
764             %state = hd_getstate( $line );
765            
766             Returns a hash with following keys/values:
767              
768             statemarker : Holds a label that represent the state of the line.
769            
770             blockdelimiter: Holds the delimiter which belongs to a 'region'.
771            
772             is_tabremovalflag: If the redirector had a trailing minus this
773             value is true for the actual line.
774              
775             A here document 'region' is defined as all here document lines being
776             bracketed by the ingress (opening delimiter) and the egress (terminating
777             delimiter) line. This region may or may not have a file unique delimiter.
778              
779             To prevent unreliable results, only pass a text line as an argument.
780             Use file test operators if reading input lines from a file:
781              
782             if ( -T $file ) {
783             print "$file 'looks' like a plain text file to me.\n";
784             }
785              
786             This function throws exceptions on a few fatal internal errors.
787             These are trappable. See ERRORS below for messages printed.
788              
789             =head2 B
790              
791             Gets or optionally sets a new unique label for the four possible states.
792              
793             %label = hd_labels();
794             %label = hd_labels( %newlabel );
795              
796             The hash keys defines the default internal label assignments.
797              
798             %label = (
799             source => 'S',
800             ingress => 'I',
801             heredoc => 'H',
802             egress => 'E',
803             );
804            
805             Returns a hash with the current label assignment.
806              
807             =head2 B
808              
809             Sets the internal state machine to 'source' and empties all internal
810             state arrays.
811              
812             hd_init();
813              
814             When reading more that one file, call this function before next file to
815             prevent any state faults to propagate to next files input. Now
816             always returns an $EMPTY_STR (q{}) but this may change to indicate an
817             state error from previous files.
818              
819              
820             =head1 ERRORS
821              
822             C throws following exceptions.
823              
824             =over 4
825              
826             =item * B
827              
828             If the text line argument is C following message, including a
829             full trace back, is printed.
830              
831             Passed argument to function is undef.
832             Can't determine state from an undef argument.
833            
834             Ensure that only a plain text line is supplied as an argument.
835              
836             =item * B
837              
838             If the state machine conclude a change was from Ingress to Ingress
839             following message, including a full trace back, is printed:
840              
841             Current state is Ingress, and passed line say we shall change
842             to Ingress again. Not allowed change i.e. Ingress --> Ingress
843            
844             If this happens, please report this as a BUG and how to reproduce.
845              
846             =item * B
847              
848             If the state machine conclude a change was from Egress to Egress
849             following, including a full trace back, message is printed:
850              
851             Current state is Egress, and passed line say we shall change
852             to Egress again. Not allowed change i.e. Egress --> Egress.
853            
854             If this happens, please report this as a BUG and how to reproduce.
855              
856             =back
857              
858             =head1 DEPENDENCIES
859              
860             I only requires Perl 5.10 (or any later version).
861              
862             =head1 AUTHOR
863              
864             Bertil Kronlund, C<< >>
865              
866             =head1 BUGS AND LIMITATIONS
867              
868             I complies with *nix POSIX shells here document syntax.
869             Non-compliant shells on e.g. MSWin32 platform is not supported.
870              
871             Please report any bugs or feature requests to
872             L or at
873             C<< >>.
874              
875             =head1 SEE ALSO
876              
877             Overview of here documents and its usage:
878             L
879              
880             The IEEE Std 1003.1-2008 standards can be found here:
881             L
882              
883             L, L
884              
885             L discuss e.g. how to embed POD as
886             here documents in shell scripts to carry their own documentation.
887              
888             =head1 LICENSE AND COPYRIGHT
889              
890             Copyright 2011-18, Bertil Kronlund
891              
892             This program is free software; you can redistribute it and/or modify it
893             under the terms of either: the GNU General Public License as published
894             by the Free Software Foundation; or the Artistic License.
895              
896             See http://dev.perl.org/licenses/ for more information.
897              
898             =cut
899              
900             1; # End of Filter::Heredoc