File Coverage

blib/lib/Filter/Heredoc.pm
Criterion Covered Total %
statement 191 200 95.5
branch 68 78 87.1
condition 5 9 55.5
subroutine 20 20 100.0
pod 3 3 100.0
total 287 310 92.5


line stmt bran cond sub pod time code
1             package Filter::Heredoc;
2              
3 24     24   849304 use 5.010;
  24         97  
  24         1008  
4 24     24   143 use strict;
  24         238  
  24         902  
5 24     24   128 use warnings;
  24         55  
  24         1648  
6              
7             our $VERSION = '0.02';
8              
9             =head1 NAME
10              
11             Filter::Heredoc - Search and filter embedded here documents
12              
13             =head1 VERSION
14              
15             Version 0.02
16              
17             =cut
18              
19 24     24   132 use base qw(Exporter);
  24         47  
  24         3490  
20 24     24   171 use feature 'state';
  24         61  
  24         3815  
21              
22 24     24   142 use Carp;
  24         47  
  24         2969  
23 24     24   15485 use Filter::Heredoc::Rule qw ( _hd_is_rules_ok_line ); # intra sub #
  24         60  
  24         223936  
24              
25             # private subroutines only used in author tests
26             our @EXPORT_OK = qw (
27             hd_init
28             hd_getstate
29             hd_labels
30             _is_comment
31             _state
32             _strip_quotes
33             _infifo
34             _is_ingress
35             _is_egress
36             _strip_tabdelimiter
37             _infifotab
38             _strip_trailing_pipe
39             @CARP_UNDEF
40             @CARP_EGRESS
41             @CARP_INGRESS
42             );
43              
44             # our thrown exceptions. What's wrong, and why it's wrong.
45             our @CARP_UNDEF = (
46             "\nPassed argument to function is undef",
47             "\nCan't determine state from an undef argument",
48             "\n"
49             );
50             our @CARP_EGRESS = (
51             "\nCurrent state is Egress, and passed line say we shall change to Egress again",
52             "\nNot allowed change i.e. Egress --> Egress",
53             "\n"
54             );
55             our @CARP_INGRESS = (
56             "\nCurrent state is Ingress, and passed line say we shall change to Ingress again",
57             "\nNot allowed change i.e. Ingress --> Ingress",
58             "\n"
59             );
60              
61             ### Export_ok subroutines starts here ###
62              
63             ### INTERFACE SUBROUTINE ###
64             # Usage : hd_getline ( $line)
65             # Purpose : Main routine to determine state changes based on the
66             # previous (existing state) and the $line (argument).
67             # Returns : Hash with state labels indicating the new state
68             # Throws : Yes, see above @CARP-globals
69              
70             sub hd_getstate {
71 557     557 1 358108 my $EMPTY_STR = q{};
72 557         910 my $line = shift;
73 557         1135 my %marker = hd_labels();
74 557         848 my @parselineitems;
75 557         2419 my $COPYOUTFROMFIFO = 1;
76              
77 557         1832 my %state = (
78             statemarker => $EMPTY_STR,
79             blockdelimiter => $EMPTY_STR,
80             is_tabremoveflag => $EMPTY_STR,
81             );
82              
83             # Argument (the text line) can not be undef
84 557 50       1611 if ( !defined($line) ) {
85 0         0 Carp::confess(@CARP_UNDEF); # trap with eval otherwise die
86             }
87              
88 557         886 chomp $line;
89              
90             =for StateTests:
91             The $line is either the ingress- or egress text line, were the state
92             flag needs to toggle, or this is either another full text line of source
93             or here document were nothing change if last one was the same.
94             The initial state is not important for the start.
95              
96             =cut
97              
98             ###############################################################
99             ### State change tests (source --> source, source -> ingress)
100             ###############################################################
101              
102             # Test if last state was in 'source'
103 557 100       1219 if ( _state() eq $marker{source} ) {
104              
105             # Test change to 'heredoc' with basic assumption on match for '<<'
106 307 100       955 if ( _is_ingress($line) ) {
107              
108             # Bugfix DBNX#13
109 48         607 $line =~ s/\s+$//; # remove trailing white spaces before split()
110              
111             # endfix
112              
113             # Each shell ingress text line may contain multiple delimiters
114 48         184 @parselineitems = split /;/, $line;
115              
116             # Process each delimiter (split by ';')
117 48         401 while ( defined( my $tmpdelim = shift @parselineitems ) ) {
118              
119             # Ensure that any parsed sub-lines is not an inline comment
120 59 100       134 if ( _is_comment($tmpdelim) ) {
121 4         16 next;
122             }
123              
124             # Bugfix DBNX#11 remove the trailing pipe '|', and any cmd behind
125             # it, if present. Applies to 'cat <
126 55         147 $tmpdelim = _strip_trailing_pipe($tmpdelim);
127              
128             # endfix
129              
130             # Extract the delimiter under POSIX assumptions
131 55         101 my $subdelimiter = $EMPTY_STR;
132 55         78 my $final_delimiter = $EMPTY_STR;
133 55         149 $subdelimiter = _get_posix_delimiter($tmpdelim);
134              
135             # The saved delimiter can not contain '-' if line was '<<-EOF'
136 55         268 $final_delimiter = _strip_tabdelimiter($subdelimiter);
137              
138             # Set the tab delimiter flag for processing by caller
139 55 100       146 if ( $final_delimiter ne $subdelimiter ) {
140 7         24 _infifotab(1); # insert tab removal true flag
141             }
142             else {
143 48         101 _infifotab($EMPTY_STR); # no tab removal condition
144             }
145              
146             # Save target 'terminator' to identify egress condition
147 55         124 _infifo($final_delimiter);
148             }
149              
150             # Update state
151 48         184 _state( $marker{ingress} );
152              
153             # Only heredoc/egress lines are applicable for tab removal flag
154 48         205 %state = (
155             statemarker => $marker{ingress},
156             is_tabremoveflag => $EMPTY_STR,
157             blockdelimiter => $EMPTY_STR, # ingress is not a here-doc
158             );
159 48         451 return %state; # Ingress - all delimiters processed on the line
160              
161             } # end if-ingress
162              
163             # prepare state hash with no state change from source
164 259         562 _state( $marker{source} );
165 259         695 %state = (
166             statemarker => $marker{source},
167             is_tabremoveflag => _infifotab( q{}, $COPYOUTFROMFIFO ),
168             blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
169             );
170 259         2093 return %state; #source
171              
172             } # end if-source
173              
174             ###############################################################
175             ### State change tests (ingress --> heredoc), and
176             ### non valid state change (ingress --> ingress)
177             ###############################################################
178              
179             # Test if last state was in 'ingress'
180 250 100       458 if ( _state() eq $marker{ingress} ) {
181 48 50       111 if ( !_is_ingress($line) ) {
182              
183 48         128 _state( $marker{heredoc} );
184 48         144 %state = (
185             statemarker => $marker{heredoc},
186             blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
187             is_tabremoveflag => _infifotab( q{}, $COPYOUTFROMFIFO ),
188             );
189 48         446 return %state; # heredoc
190              
191             # Throw an exception with full backtrace, including above error message!
192             }
193             else {
194 0         0 Carp::confess(@CARP_INGRESS); # trap with eval otherwise die
195 0         0 return;
196             }
197              
198             } # end if-ingress
199              
200             ###############################################################
201             ### State change tests (heredoc --> heredoc, heredoc -> egress)
202             ###############################################################
203              
204             # Test if last state was in 'heredoc'
205 202 100       455 if ( _state() eq $marker{heredoc} ) {
206              
207 148 100       282 if ( _is_egress($line) ) {
208              
209             # Prepare state hash and change state from heredoc
210 54         372 _state( $marker{egress} );
211 54         216 %state = (
212             statemarker => $marker{egress},
213             blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
214             is_tabremoveflag => _infifotab(), # removes the tab flag
215             );
216 54         145 _infifo(); # removes the delimiter from the fifo array
217              
218 54         520 return %state; # egress
219              
220             } # end if-egress
221              
222             # Prepare state hash with no state change from heredoc
223 94         231 _state( $marker{heredoc} );
224 94         234 %state = (
225             statemarker => $marker{heredoc},
226             is_tabremoveflag => _infifotab( q{}, $COPYOUTFROMFIFO ),
227             blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
228             );
229              
230 94         825 return %state; #heredoc
231              
232             } # end if-heredoc
233              
234             ###############################################################
235             ### State change tests (egress --> source, egress --> heredoc)
236             ### and test for non valid state change (egress --> egress)
237             ###############################################################
238              
239             # Test if last state was in 'egress'
240 54 50       119 if ( _state() eq $marker{egress} ) {
241              
242 54         124 my $fifolength = length( _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ) );
243              
244             # Infifo terminator doesn't contains any delimiters, change to source
245 54 100       176 if ( $fifolength == 0 ) {
246              
247 47         144 _state( $marker{source} );
248 47         146 %state = (
249             statemarker => $marker{source},
250             is_tabremoveflag => $EMPTY_STR,
251             blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
252             );
253 47         485 return %state; #source
254             }
255              
256 7 50 33     37 if ( ( $fifolength != 0 ) && ( _is_egress($line) ) ) {
257              
258             # Unexpected direct egress line again
259 0         0 Carp::confess(@CARP_EGRESS); # trap with eval otherwise die
260 0         0 return;
261             }
262             else {
263              
264             # Terminator array does not match - change state back to heredoc
265 7         20 _state( $marker{heredoc} );
266 7         24 %state = (
267             statemarker => $marker{heredoc},
268             blockdelimiter => _infifo( $EMPTY_STR, $COPYOUTFROMFIFO ),
269             is_tabremoveflag => _infifotab( q{}, $COPYOUTFROMFIFO ),
270             );
271              
272 7         66 return %state; #heredoc
273              
274             }
275              
276             } # end if-egress
277              
278             }
279             ### INTERFACE SUBROUTINE ###
280             # Usage : hd_labels() or hd_labels( %newlabels )
281             # Purpose : Subroutine to get/set state labels.
282             # default labels are 'S', 'I', 'H' and 'E'.
283             # (i.e Source, Ingress, Heredoc, or Egress)
284             # Returns : Hash with the definition of labels for each state
285             # Throws : No
286              
287             sub hd_labels {
288 2184     2184 1 9035 my %arg = @_;
289 2184         2257 my %marker;
290              
291 2184 100       7229 $arg{source} = q{S} unless exists $arg{source};
292 2184 100       5885 $arg{ingress} = q{I} unless exists $arg{ingress};
293 2184 100       5525 $arg{heredoc} = q{H} unless exists $arg{heredoc};
294 2184 100       5619 $arg{egress} = q{E} unless exists $arg{egress};
295              
296 2184         2525 state $source = $arg{source};
297 2184         2387 state $ingress = $arg{ingress};
298 2184         2681 state $heredoc = $arg{heredoc};
299 2184         4064 state $egress = $arg{egress};
300              
301 2184         16927 return %marker = (
302             source => $source,
303             ingress => $ingress,
304             heredoc => $heredoc,
305             egress => $egress,
306             );
307             }
308              
309             ### INTERFACE SUBROUTINE ###
310             # Usage : hd_init()
311             # Purpose : Empties the terminator and tab arrays and set the internal
312             # state to source. Used after each file processed in case of
313             # the ingress/egress conditions are not found properly.
314             # Default labels are 'S', 'I', 'H' and 'E'.
315             # (i.e Source, Ingress, Heredoc, or Egress)
316             # Returns : $EMPTY_STR
317             # Throws : No
318              
319             sub hd_init {
320 1     1 1 820 my %marker = hd_labels(); # get default markers
321 1         2 my $initstate = $marker{source}; # default initial state
322 1         2 my $EMPTY_STR = q{};
323              
324             # Set the state to source
325 1         2 _state($initstate);
326              
327             # Empty the terminator array
328             FIFOLOOP:
329 1         3 while ( _infifo() ) {
330 1         3 next FIFOLOOP;
331             }
332              
333             # empty the tab array
334             TABLOOP:
335 1         2 while ( _infifotab() ) {
336 1         28 next TABLOOP;
337             }
338              
339 1         3 return $EMPTY_STR;
340             }
341              
342              
343             ### The Module private subroutines starts here ###
344              
345             ### INTERNAL UTILITY ###
346             # Usage : _is_comment( $line )
347             # Purpose : Prevent a false ingress condition if line is a comment.
348             # Returns : True (1) or False ($EMPTY_STR)
349             # Throws : No
350              
351             sub _is_comment {
352 414     414   537 my $EMPTY_STR = q{};
353 414         455 my $line;
354              
355 414 50       955 if ( !defined( $line = shift ) ) {
356 0         0 return $EMPTY_STR;
357             }
358              
359             # If only white space left of the '#' its a comment.
360 414         772 $line =~ tr/ \t\n\r\f//d;
361              
362             # Test first character for '#', i.e. index() return 0.
363 414 100       1524 if ( index( $line, '#' ) == 0 ) {
364 86         404 return 1;
365             }
366              
367 328         1009 return $EMPTY_STR; # It's not a comment
368             }
369              
370             ### INTERNAL UTILITY ###
371             # Usage : _is_ingress( $line )
372             # Purpose : Determine if line is an ingress line (regex /<
373             # Returns : True (1) or False ($EMPTY_STR)
374             # Throws : No
375              
376             sub _is_ingress {
377 355     355   989 my $line = shift;
378 355         427 my $EMPTY_STR = q{};
379              
380 355 100       1326 if ( !_is_comment($line) ) {
381              
382 273 100       1010 if ( $line =~ m/<
383              
384             ## Prevent false positives (Filter::Heredoc::Rule) ##
385 52 100       293 if ( !_hd_is_rules_ok_line($line) ) {
386 4         13 return $EMPTY_STR; # FALSE, not an ingress line
387             }
388              
389 48         155 return 1; # TRUE
390             }
391             }
392 303         852 return $EMPTY_STR; # FALSE
393             }
394              
395             ### INTERNAL UTILITY ###
396             # Usage : _is_egress( $line )
397             # Purpose : Determine if line is an egress line
398             # Returns : True (1) or False ($EMPTY_STR)
399             # Throws : No
400              
401             sub _is_egress {
402 155     155   226 my $line = shift;
403 155         227 my $EMPTY_STR = q{};
404 155         193 my $nextoutdelimiter = $EMPTY_STR;
405 155         191 my $COPYOUTFROMFIFO = 1;
406              
407             =for EgressNotes:
408             To be a valid delimter, first word in line must match next infifo terminator.
409             split() defaults to split on ' ' and on $_ (and this is not same as //!)
410             Currently no rule helper is used on the egress delimiter.
411             Removes all trailing white space (and if no word, all is removed)
412              
413             =cut
414              
415 155         176 $_ = $line;
416 155         491 my @linefield = split;
417              
418             # Check what is waiting (do not remove) from fifo of delimiters
419 155         400 $nextoutdelimiter = _infifo( $EMPTY_STR, $COPYOUTFROMFIFO );
420              
421             # Stop processing, no delimiters in fifo
422 155 50       494 if ( $nextoutdelimiter eq $EMPTY_STR ) {
423 0         0 return $EMPTY_STR;
424             }
425              
426             # Line is undef for lines with white space
427 155 100       568 if ( !defined( $linefield[0] ) ) {
    100          
428 17         51 return $EMPTY_STR; # FALSE
429             }
430             elsif ( $nextoutdelimiter eq $linefield[0] ) {
431 54         283 return 1; # TRUE
432             }
433              
434 84         573 return $EMPTY_STR; # FALSE
435             }
436              
437             ### INTERNAL UTILITY ###
438             # Usage : _get_posix_delimiter( $line )
439             # Purpose : Extracts the delimiter and assumes POSIX i.e. white
440             # space is not significant between '<<' and 'delimiter'.
441             # Returns : The delimiter itself (includes '-' if << -EOT).
442             # Throws : No
443              
444             sub _get_posix_delimiter {
445 55     55   117 my $tmpdelim = shift;
446 55         81 my $EMPTY_STR = q{};
447 55         77 my $subdelimiter = $EMPTY_STR;
448              
449             # Remove all quote characters and get the delimiter itself
450 55         384 $tmpdelim =~ s/\s+//g; # removes all white space (becomes one word)
451 55         163 $tmpdelim = _strip_quotes($tmpdelim); # removes any [ " ' \ ]
452 55         211 $tmpdelim =~ m/<{2}(.*)/;
453 55         137 $subdelimiter = $1;
454              
455 55         130 return $subdelimiter;
456             }
457              
458             ### INTERNAL UTILITY ###
459             # Usage : _state() or _state( q{E} )
460             # Purpose : Subroutine to get/set the persistent state.
461             # Returns : The state (label) of the state machine when called.
462             # Throws : No
463              
464             sub _state {
465 1622     1622   2652 my %marker = hd_labels();
466 1622         2568 state $linestate = $marker{source}; # default initial state
467 1622         1913 my $newstate = shift;
468              
469             # Set or get the new state
470 1622 100       3576 $linestate = $newstate if defined $newstate;
471              
472 1622         5018 return $linestate;
473             }
474              
475             ### INTERNAL UTILITY ###
476             # Usage : _strip_quotes( $line )
477             # Purpose : Before a delimiter is ready to be saved, quotes shall
478             # first be removed.
479             # Returns : String without any quotes or escapes character i.e. [" ' \ ].
480             # Throws : No
481              
482             sub _strip_quotes {
483 55     55   98 my $tmpstr = shift;
484 55         119 my $noquotesstr;
485              
486 55         92 $tmpstr =~ tr/\\//d; # remove all [\];
487 55         109 $tmpstr =~ tr/"//d; # remove all ["];
488 55         83 $tmpstr =~ tr/'//d; # remove all ['];
489              
490 55         82 $noquotesstr = $tmpstr;
491              
492 55         128 return $noquotesstr;
493             }
494              
495             ### INTERNAL UTILITY ###
496             # Usage : _strip_tabdelimiter( $line )
497             # Purpose : Removes the tab-delimiter '-' after '<<' if present.
498             # Returns : String without '-' or the original string not present.
499             # Throws : No
500              
501             sub _strip_tabdelimiter {
502 55     55   96 my $line = shift;
503              
504             # Get the string after '-'
505 55 100       188 if ( $line =~ m/^-(.*)/ ) {
506 7         19 return $1;
507             }
508              
509 48         105 return $line; # ..otherwise return the original string
510             }
511              
512             ### INTERNAL UTILITY ###
513             # Usage : _infifo( $line ), _infifo(), _infifo( $EMPTY_STR, 1 )
514             # Purpose : Accessor routine to insert/extract delimiter from fifo array.
515             # When extracting, the delimiter is fully removed from array.
516             # The last syntax looks for next delimiter without removing it.
517             # Returns : Returns the delimiter or an $EMPTY_STR when no delimiters exists.
518             # Throws : No
519              
520             sub _infifo {
521 829     829   1069 my $EMPTY_STR = q{};
522 829         1034 my $delimiter = shift;
523 829   66     1875 my $copyoutfromfifo = shift || $EMPTY_STR; # default FALSE
524 829         9036 my $nextelementout;
525              
526             # Holds the egress terminator(s) at any given time
527 829         1140 state @terminators;
528              
529             # Test that its not the pre-view mode
530 829 100       1923 if ( !$copyoutfromfifo ) {
531              
532             # Insert the new delimiter in the fifo array
533 111 100       207 if ( defined $delimiter ) {
534 55         96 push @terminators, $delimiter;
535 55         221 return;
536             }
537             else {
538              
539             # Shift out next delimiter
540 56 100       158 if ( defined( my $tmp = shift @terminators ) ) {
541 55         107 return $tmp;
542             }
543             else {
544 1         4 return $EMPTY_STR; # fifo array is empty
545             }
546             }
547             }
548              
549             # Neither insert or extract - pre-view next array element in the array
550             else {
551              
552             # Third mode of syntax, '$copyoutfromfifo' is not-false from above
553 718 50       1946 if ( $delimiter eq $EMPTY_STR ) {
554              
555             # Get one delimiter from the terminator fifo array
556 718 100       1660 if ( defined( $nextelementout = shift @terminators ) ) {
557              
558             # Preserve the fifo array insert the delimiter again
559 365         736 unshift @terminators, $nextelementout;
560 365         1333 return $nextelementout;
561             }
562             else {
563 353         1823 return $EMPTY_STR;
564             }
565             }
566             }
567              
568             }
569              
570             ### INTERNAL UTILITY ###
571             # Usage : _infifotab( $flag ), _infifotab(), _infifotab( $EMPTY_STR, 1 )
572             # Purpose : Accessor routine to insert/extract true/false from tabfifo array.
573             # When extracting, the value is fully removed from array.
574             # The last syntax looks for next flag value without removing it.
575             # Returns : Returns 1 (true) or an $EMPTY_STR when no flags exists.
576             # Throws : No
577              
578             sub _infifotab {
579 519     519   658 my $EMPTY_STR = q{};
580 519         569 my $istabremoveflag = shift; # this is either $EMPTY_STR, or '1' i.e true
581 519   66     1462 my $copyoutfromfifo = shift || $EMPTY_STR; # default FALSE
582 519         502 my $nextelementout;
583              
584             # Holds tab-removal flags at any given time
585 519         757 state @tabremovals;
586              
587             # Test that its not the pre-view mode
588 519 100       1017 if ( !$copyoutfromfifo ) {
589              
590             # Add the new flag value to fifo
591 111 100       261 if ( defined $istabremoveflag ) {
592 55         95 push @tabremovals, $istabremoveflag;
593 55         118 return;
594             }
595             else {
596              
597             # Shift out next flag value
598 56 100       160 if ( defined( my $tmp = shift @tabremovals ) ) {
599 55         303 return $tmp;
600             }
601             else {
602 1         4 return $EMPTY_STR; # fifo array is empty
603             }
604             }
605             }
606              
607             # Neither insert or extract - pre-view next array element in the array
608             else {
609              
610             # Third mode of syntax, '$copyoutfromfifo' is not-false from above
611 408 50       1154 if ( $istabremoveflag eq $EMPTY_STR ) {
612              
613             # Get one tab delimiter from the tabremoval fifo array
614 408 100       931 if ( defined( $nextelementout = shift @tabremovals ) ) {
615              
616             # Preserve the fifo array insert the flag again
617 149         321 unshift @tabremovals, $nextelementout;
618 149         637 return $nextelementout;
619             }
620             else {
621 259         934 return $EMPTY_STR;
622             }
623             }
624             }
625              
626             }
627              
628             ### INTERNAL UTILITY ###
629             # Usage : _strip_trailing_pipe( $line )
630             # Purpose : Ingress line characters after a pipe (and an optional shell
631             # command) must be removed to allow extracting the delimiter.
632             # Returns : The line, with everything after the pipe removed incl the pipe
633             # or the line untouched if there is no pipe.
634             # Throws : No
635              
636             sub _strip_trailing_pipe {
637 55     55   89 my $EMPTY_STR = q{};
638 55         93 my $line = shift;
639 55         79 my $newline = $EMPTY_STR;
640              
641 55 50       312 if ( !defined($line) ) {
642 0         0 return $EMPTY_STR;
643             }
644              
645 55         419 my $regexpipe = qr/\|/;
646 55         359 my $regexcapture = qr/^(.*)\|/;
647              
648             # If no pipe return original line
649 55 100       347 if ( $line !~ $regexpipe ) {
650 47         189 return $line;
651             }
652              
653             # Capture everything up to the pipe symbol
654 8 50       58 if ( $line =~ $regexcapture ) {
655 8         22 $newline = $1;
656 8         36 return $newline;
657             }
658              
659 0           return $line; # If match fails returns the original string
660             }
661              
662              
663             =head1 SYNOPSIS
664              
665             use 5.010;
666             use Filter::Heredoc qw( hd_getstate hd_init hd_labels );
667             use Filter::Heredoc::Rule qw( hd_syntax );
668            
669             my $line;
670             my %state;
671            
672             # Get the defined labels to compare with the returned state
673             my %label = hd_labels();
674              
675             # Read a file line-by-line and print only the here document
676             while (defined( $line = )) {
677             %state = hd_getstate( $line );
678             print $line if ( $state{statemarker} eq $label{heredoc} );
679             if ( eof ) {
680             close( ARGV );
681             hd_init(); # Prevent state errors to propagate to next file
682             }
683             }
684              
685             # Test a line (is this an opening delimiter line?)
686             $line = q{cat <
687             %state = hd_getstate( $line );
688             print "$line\n" if ( $state{statemarker} eq $label{ingress} );
689            
690             # Load a syntax helper rule (shell script is built in)
691             hd_syntax ( 'pod' );
692              
693             =head1 DESCRIPTION
694              
695             This is the core module for I. If you're not looking
696             to extend or alter the behavior of this module, you probably want to
697             look at L instead.
698              
699             I provides subroutines to search and print here
700             documents. Here documents (also called "here docs") allow a type of
701             input redirection from some following text. This is often used to embed
702             short text messages (or configuration files) within shell scripts.
703              
704             This module extracts here documents from POSIX IEEE Std 1003.1-2008
705             compliant shell scripts. Perl have derived a similar syntax but is at
706             the same time different in many details.
707              
708             Rules can be added to enhance here document extraction, i.e. prevent
709             "false positives". L exports an additional
710             subroutine to load and unload rules.
711              
712             This version supports a basic C rule. Current subroutines can be
713             tested on Perl scripts if the code constructs use a near POSIX form
714             of here documents. With that said don't rely on the current version
715             for Perl since it's still in a very early phase of development.
716              
717             =head2 Concept to parse here documents.
718              
719             This is a line-by-line state machine design. Reading from the beginning
720             to the end of a script results in following state changes:
721              
722             Source --> Here document --> Source
723            
724             What tells a source line from a here document line apart? Nothing!
725             However if adding an opening and closing delimiter state I tracking
726             previous state we can identify what is source and what's a here document:
727              
728             Source --> Ingress --> Here document --> Egress --> Source
729              
730             In reality there are few more state changes defined by POSIX.
731             An example of this is the script below and with added state labels:
732              
733             S] #!/bin/bash --posix
734             I] cat <
735             H] Hi,
736             E] eof1
737             H] Helene.
738             E] eof2
739             S]
740              
741             Naturally, when bash runs this only the here document is printed:
742              
743             Hi,
744             Helene.
745              
746             =head1 SUBROUTINES
747              
748             I exports following subroutines only on request.
749              
750             hd_getstate # returns a label based on the argument (text line)
751             hd_labels # reads out and (optionally) define new labels
752             hd_init # flushes the internal state machine
753            
754             L exports one subroutine to load and unload
755             syntax rules.
756              
757             hd_syntax # load/unload a script syntax rule
758              
759             =head2 B
760              
761             This routine determines the new state, based on last state C the
762             new text line in the argument.
763              
764             %state = hd_getstate( $line );
765            
766             Returns a hash with following keys/values:
767              
768             statemarker : Holds a label that represent the state of the line.
769            
770             blockdelimiter: Holds the delimiter which belongs to a 'region'.
771            
772             is_tabremovalflag: If the redirector had a trailing minus this
773             value is true for the actual line.
774              
775             A here document 'region' is defined as all here document lines being
776             bracketed by the ingress (opening delimiter) and the egress (terminating
777             delimiter) line. This region may or may not have a file unique delimiter.
778              
779             To prevent unreliable results, only pass a text line as an argument.
780             Use file test operators if reading input lines from a file:
781              
782             if ( -T $file ) {
783             print "$file 'looks' like a plain text file to me.\n";
784             }
785              
786             This function throws exceptions on a few fatal internal errors.
787             These are trappable. See ERRORS below for messages printed.
788              
789             =head2 B
790              
791             Gets or optionally sets a new unique label for the four possible states.
792              
793             %label = hd_labels();
794             %label = hd_labels( %newlabel );
795              
796             The hash keys defines the default internal label assignments.
797              
798             %label = (
799             source => 'S',
800             ingress => 'I',
801             heredoc => 'H',
802             egress => 'E',
803             );
804            
805             Returns a hash with the current label assignment.
806              
807             =head2 B
808              
809             Sets the internal state machine to 'source' and empties all internal
810             state arrays.
811              
812             hd_init();
813              
814             When reading more that one file, call this function before next file to
815             prevent any state faults to propagate to next files input. Now
816             always returns an $EMPTY_STR (q{}) but this may change to indicate an
817             state error from previous files.
818              
819              
820             =head1 ERRORS
821              
822             C throws following exceptions.
823              
824             =over 4
825              
826             =item * B
827              
828             If the text line argument is C following message, including a
829             full trace back, is printed.
830              
831             Passed argument to function is undef.
832             Can't determine state from an undef argument.
833            
834             Ensure that only a plain text line is supplied as an argument.
835              
836             =item * B
837              
838             If the state machine conclude a change was from Ingress to Ingress
839             following message, including a full trace back, is printed:
840              
841             Current state is Ingress, and passed line say we shall change
842             to Ingress again. Not allowed change i.e. Ingress --> Ingress
843            
844             If this happens, please report this as a BUG and how to reproduce.
845              
846             =item * B
847              
848             If the state machine conclude a change was from Egress to Egress
849             following, including a full trace back, message is printed:
850              
851             Current state is Egress, and passed line say we shall change
852             to Egress again. Not allowed change i.e. Egress --> Egress.
853            
854             If this happens, please report this as a BUG and how to reproduce.
855              
856             =back
857              
858             =head1 DEPENDENCIES
859              
860             I only requires Perl 5.10 (or any later version).
861              
862             =head1 AUTHOR
863              
864             Bertil Kronlund, C<< >>
865              
866             =head1 BUGS AND LIMITATIONS
867              
868             I complies with *nix POSIX shells here document syntax.
869             Non-compliant shells on e.g. MSWin32 platform is not supported.
870              
871             Please report any bugs or feature requests to
872             L or at
873             C<< >>.
874              
875             =head1 SEE ALSO
876              
877             Overview of here documents and its usage:
878             L
879              
880             The IEEE Std 1003.1-2008 standards can be found here:
881             L
882              
883             L, L
884              
885             L discuss e.g. how to embed POD as
886             here documents in shell scripts to carry their own documentation.
887              
888             =head1 LICENSE AND COPYRIGHT
889              
890             Copyright 2011-12, Bertil Kronlund
891              
892             This program is free software; you can redistribute it and/or modify it
893             under the terms of either: the GNU General Public License as published
894             by the Free Software Foundation; or the Artistic License.
895              
896             See http://dev.perl.org/licenses/ for more information.
897              
898             =cut
899              
900             1; # End of Filter::Heredoc