File Coverage

blib/lib/Text/MetaText.pm
Criterion Covered Total %
statement 468 578 80.9
branch 176 256 68.7
condition 47 82 57.3
subroutine 51 64 79.6
pod 3 6 50.0
total 745 986 75.5


line stmt bran cond sub pod time code
1             #============================================================================
2             #
3             # Text::MetaText
4             #
5             # DESCRIPTION
6             # Perl 5 module to process template files, featuring variable
7             # substitution, file inclusion, conditional operations, print
8             # filters and formatting, etc.
9             #
10             # AUTHOR
11             # Andy Wardley
12             #
13             # COPYRIGHT
14             # Copyright (C) 1996-1998 Andy Wardley. All Rights Reserved.
15             #
16             # This module is free software; you can redistribute it and/or
17             # modify it under the terms of the Perl Artistic Licence.
18             #
19             #----------------------------------------------------------------------------
20             #
21             # $Id: MetaText.pm,v 0.22 1998/09/01 11:23:14 abw Exp abw $
22             #
23             #============================================================================
24            
25             package Text::MetaText;
26              
27 9     9   24990 use strict;
  9         22  
  9         426  
28 9     9   12389 use FileHandle;
  9         163744  
  9         58  
29 9     9   22162 use Date::Format;
  9         123306  
  9         1313  
30 9     9   107 use vars qw( $VERSION $FACTORY $ERROR );
  9         20  
  9         588  
31              
32 9     9   14196 use Text::MetaText::Factory;
  9         24  
  9         1081  
33              
34             require 5.004;
35              
36              
37              
38             #========================================================================
39             # ----- CONFIGURATION -----
40             #========================================================================
41            
42             $VERSION = sprintf("%d.%02d", q$Revision: 0.22 $ =~ /(\d+)\.(\d+)/);
43             $FACTORY = 'Text::MetaText::Factory';
44              
45              
46             # debug level constants (debugging will get nicer one day RSN)
47 9     9   57 use constant DBGNONE => 0; # no debugging
  9         16  
  9         730  
48 9     9   57 use constant DBGINFO => 1; # information message only
  9         13  
  9         494  
49 9     9   84 use constant DBGCONF => 2; # configuration details
  9         15  
  9         847  
50 9     9   48 use constant DBGPREP => 4; # show pre-processor operations
  9         16  
  9         349  
51 9     9   40 use constant DBGPROC => 8; # show process operation
  9         17  
  9         330  
52 9     9   45 use constant DBGPOST => 16; # show post-process operation
  9         16  
  9         535  
53 9     9   45 use constant DBGDATA => 32; # show data elements (parameters)
  9         16  
  9         364  
54 9     9   45 use constant DBGCONT => 64; # show content of blocks
  9         21  
  9         377  
55 9     9   62 use constant DBGFUNC => 128; # private method calls
  9         25  
  9         416  
56 9     9   43 use constant DBGEVAL => 256; # show conditional evaluation steps
  9         33  
  9         494  
57 9     9   45 use constant DBGTEST => 512; # test code
  9         15  
  9         355  
58 9     9   43 use constant DBGALL => 1023; # all debug information
  9         20  
  9         118295  
59              
60             my $DBGNAME = {
61             'none' => DBGNONE,
62             'info' => DBGINFO,
63             'config' => DBGCONF,
64             'preproc' => DBGPREP,
65             'process' => DBGPROC,
66             'postproc' => DBGPOST,
67             'data' => DBGDATA,
68             'content' => DBGCONT,
69             'function' => DBGFUNC,
70             'evaluate' => DBGEVAL,
71             'test' => DBGTEST,
72             'all' => DBGALL,
73             };
74              
75              
76              
77             #========================================================================
78             # ----- PUBLIC METHODS -----
79             #========================================================================
80            
81             #========================================================================
82             #
83             # new($cfg)
84             #
85             # Module constructor. Reference to a hash array containing configuration
86             # options may be passed as a parameter. This is passed off to
87             # _configure() for processing.
88             #
89             # Returns a reference to a newly created Text::MetaText object.
90             #
91             #========================================================================
92              
93             sub new {
94 13     13 1 25386 my $class = shift;
95 13         34 my $self = {};
96 13         35 bless $self, $class;
97              
98 13         65 $self->_configure(@_);
99 13         33 return $self;
100             }
101              
102              
103              
104             #========================================================================
105             #
106             # process_file($file, \%tags)
107             #
108             # Public method for processing files. Calls _parse_file($file) to
109             # parse and load the file into the symbol table (indexed by $file)
110             # and then calls $self->_process($file, $tags) to process the symbol
111             # table entry and generate output. The optional $tags parameter may be
112             # used to refer to a hash array of pre-defined variables which should be
113             # used when processing the file.
114             #
115             # Returns the result of $self->_process($file, $tags) which may be undef
116             # to indicate a processing error. May also return undef to indicate a
117             # parse error. On success, a text string is returned which contains the
118             # output of the process stage.
119             #
120             #========================================================================
121              
122             sub process_file {
123 85     85 0 128 my $self = shift;
124 85         152 my $file = shift;
125              
126              
127 85         459 $self->_DEBUG(DBGFUNC, "process_file($file, %s)\n", join(", ", @_));
128              
129             # parse the file into the symbol table if it's not already there
130 85 100       244 unless ($self->_symbol_defined($file)) {
131 30 50       132 return undef unless defined $self->_parse_file($file);
132             }
133              
134             # call _process to do the real processing and implicitly return result
135 85         325 $self->_process($file, @_);
136             }
137              
138              
139              
140             #========================================================================
141             #
142             # process_text($text, \%tags)
143             #
144              
145             # Public method for processing text strings. Calls _parse_text($text) to
146             # parse the string and return a reference to an anonymous array, $block,
147             # which represents the parsed text string, separated by newlines. This
148             # is then passed to $self->_process($block, @_) along with any other
149             # parameters passed in to process_text(), such as $tags which is a
150             # reference to a hash array of pre-defined variables.
151             #
152             # Returns the result of $self->_process($block, $tags) which may be undef
153             # to indicate a processing error. May also return undef to indicate a
154             # parse error. On success, a text string is returned which contains the
155             # output of the process stage.
156             #
157             #========================================================================
158              
159             sub process_text {
160 0     0 0 0 my $self = shift;
161 0         0 my $text = shift;
162 0         0 my $block;
163              
164              
165 0         0 $self->_DEBUG(DBGFUNC, "process_text($text, ", join(", ", @_), ")\n");
166              
167              
168             # parse the text and store the returned block array
169 0 0       0 return undef unless defined($block = $self->_parse_text($text));
170              
171             # call _process to do the real processing and implicitly return result
172 0         0 $self->_process($block, @_);
173             }
174              
175              
176              
177             #========================================================================
178             #
179             # process($file, \%tags)
180             #
181             # Alias for 'process_file(@_)' which is provided for backward
182             # compatibility with older MetaText versions.
183             #
184             #========================================================================
185              
186             sub process {
187 26     26 0 27827 my $self = shift;
188 26         101 $self->process_file(@_);
189             }
190              
191              
192              
193             #========================================================================
194             #
195             # declare($input, $name)
196             #
197             # Public method which allows text blocks and pre-compiled directive
198             # arrays to be installed in the symbol table for subsequent use in
199             # %% INCLUDE %% directives.
200             #
201             # In the simplest case, $input is a text string (i.e. any scalar) which
202             # may contain embedded MetaText directives. This is parsed using the
203             # _parse_text($input, $name) method which creates a parsed directive
204             # list which is subsequently installed in the symbol table, indexed by
205             # $name. Subsequent directives of the form "%% INCLUDE $name %%" will
206             # then correctly resolve the cached contents parsed from the text string.
207             #
208             # $input may also be a reference to an array of text strings and/or
209             # MetaText directive objects. These are instances of the
210             # Text::MetaText::Directive class, or sub-classes thereof. If you know
211             # how to instantiate directive objects directly, then you can store
212             # "pre-compiled" blocks straight into the symbol table using this method.
213             # This can significantly speed up processing times for complex,
214             # dynamically contructed blocks by totally elimiating the parsing stage.
215             #
216             # The MetaText Directive class will shortly be updated (beyond 0.2)
217             # to make this process easier. At that point, the Directive documentation
218             # will updated to better explain this process. In the mean time, don't
219             # worry if you don't understand this - you're probably not one of the
220             # two people who specifically needed this feature :-)
221             #
222             # Returns 1 if the symbol table entry was correctly defined. If a parse
223             # error occurs (when parsing a text string), an error is raised and
224             # undef is returned.
225             #
226             #========================================================================
227              
228             sub declare {
229 2     2 1 20 my $self = shift;
230 2         3 my $input = shift;
231 2         4 my $name = shift;
232 2         3 my $ref;
233              
234             # is $input a reference of some kind?
235 2 100       7 if ($ref = ref($input)) {
236              
237             # $input may be an array ref of text/directives
238 1 50       4 $ref eq 'ARRAY' && do {
239             # get a symbol table entry reference
240 1         11 my $symtabent = $self->_symbol_entry($name);
241              
242             # clear any existing symbol table entry and push new content
243 1 50       3 splice(@$symtabent, 0) if scalar @$symtabent;
244 1         3 push(@$symtabent, @$input);
245              
246             # no problem
247 1         3 return 1;
248             };
249              
250             # $input may (in the future) be other kinds of refs...
251 0         0 $self->_error("Invalid input reference passed to declare()");
252             }
253             else {
254             # $input is not a reference so we assume it is text; we call
255             # _parse_text($input, $name) to handle it but we do *not*
256             # directly propagate the return value which is a direct reference
257             # to the symbol table entry; data encapsulation and all that
258 1 50       5 return $self->_parse_text($input, $name) ? 1 : undef;
259             }
260             }
261              
262              
263              
264             #========================================================================
265             #
266             # error()
267             #
268             # Public method returning contents of internal ERROR string.
269             #
270             #========================================================================
271              
272             sub error {
273 0     0 1 0 my $self = shift;
274              
275 0         0 return $self->{ ERROR };
276             }
277              
278              
279              
280             #========================================================================
281             # ----- PRIVATE METHODS -----
282             #========================================================================
283            
284             #========================================================================
285             #
286             # _configure($cfg)
287             #
288             # Configuration method which examines the elements in the hash array
289             # referenced by $cfg and sets the object's internal state accordingly.
290             # Errors/warnings are reported via $self->_warn();
291             #
292             #========================================================================
293              
294             sub _configure {
295 13     13   26 my $self = shift;
296 13         24 my $cfg = shift;
297              
298              
299             # initialise class data members
300 13         70 $self->{ SYMTABLE } = {};
301 13         41 $self->{ LINES } = [];
302 13         34 $self->{ ERROR } = ''; # error string (not ERRORFN!)
303              
304             # set configuration defaults
305 13         34 $self->{ DEBUGLEVEL } = DBGNONE; # DEBUG mask
306 13         49 $self->{ MAGIC } = [ '%%', '%%' ]; # directive delimiters
307 13         28 $self->{ MAXDEPTH } = 32; # maximum recursion depth
308 13         29 $self->{ LIB } = ""; # library path for INCLUDE
309 13         42 $self->{ ROGUE } = {}; # how to handle rogue directives
310 13         26 $self->{ CASE } = 0; # case sensitivity flag
311 13         42 $self->{ CASEVARS } = {}; # case sensitive variables
312 13         31 $self->{ CHOMP } = 0; # chomp straggling newlines
313 13         43 $self->{ TRIM } = 1; # trim INCLUDE leading/trailing newlines
314 13         30 $self->{ EXECUTE } = 0; # execute SUBST as function?
315 13         30 $self->{ DELIMITER } = ','; # what splits a list?
316             $self->{ FILTER } = { # pre-defined filters
317             'sr' => sub {
318 4   50 4   11 my $m1 = $_[2] || '';
319 4   100     15 my $m2 = $_[3] || '';
320 4         745 $_[1] =~ s/$m1/$m2/g;
321 4         11 $_[1];
322             },
323             'escape' => sub {
324 4   50 4   30 my $cm = $_[2] || '';
325 4         90 $_[1] =~ s/($cm)/\\$1/g;
326 4         15 $_[1];
327             },
328 13         128 };
329              
330             # the config hash array reference, $cfg, may contain a number of
331             # different config options. These are examined case-insensitively
332             # (but converted to UPPER CASE when stored) and, depending on the
333             # option, tested for correctness, manipulated or massaged in some
334             # way; invalid options generate a warning.
335 13 100       55 return unless defined $cfg;
336              
337             # check a hash ref was supplied as $cfg
338 11 50       48 unless (ref($cfg) eq 'HASH') {
339 0         0 $self->_warn(ref($self) . "->new expects a hash array reference\n");
340 0         0 return;
341             };
342              
343 11         41 foreach (keys %$cfg) {
344              
345             # set simple config values (converting keyword to UPPER case)
346 15 100       136 /^(MAXDEPTH|LIB|DELIMITER|CASE|CHOMP|TRIM|EXECUTE)$/i && do {
347 7         27 $self->{ "\U$_" } = $cfg->{ $_ };
348 7         40 next;
349             };
350              
351             # add any user-defined print filters to the pre-defined ones
352 8 50       27 /^FILTER$/i && do {
353 0         0 my $filter;
354 0         0 foreach $filter (keys %{$cfg->{ $_ }}) {
  0         0  
355 0         0 $self->{ "\U$_" }->{ $filter } = $cfg->{ $_ }->{ $filter };
356             }
357 0         0 next;
358             };
359              
360             # debuglevel is defined as a series of non-word delimited words
361             # which index into the $DBGNAME hash ref for values
362 8 50       19 /^DEBUGLEVEL$/i && do {
363 0         0 foreach (split(/\W+/, $cfg->{ $_ })) {
364 0 0       0 $self->_warn("Invalid debug option: $_\n"), next
365             unless defined($DBGNAME->{ $_ });
366              
367             # logically OR in the new debug value
368 0         0 $self->{ DEBUGLEVEL } |= $DBGNAME->{ $_ };
369             }
370 0         0 next;
371             };
372              
373             # ROGUE defines how unrecognised (rogue) directives should
374             # be handled.
375 8 100       24 /^ROGUE$/i && do {
376             # create a hash reference of valid ROGUE options and
377             # print a warning message about invalid options
378 3         14 foreach my $rogue (split(/\W+/, $cfg->{ $_ })) {
379 4 50       16 if ($rogue =~ /^warn|delete$/i) {
380 4         17 $self->{ ROGUE }->{ uc $rogue } = 1;
381             }
382             else {
383 0         0 $self->_warn("Invalid rogue option: \L$_\n");
384             }
385             }
386 3         6 next;
387             };
388              
389             # CASEVARS are those variables which don't get folded to lower
390             # case when case sensitivity is turned off. This is useful for
391             # metapage which likes to define some "system" variables in
392             # UPPER CASE such as FILETIME, FILENAME, etc.
393 5 100       32 /^CASEVARS$/i && do {
394 1 50       6 if (ref($cfg->{ $_ }) eq 'ARRAY') {
395 1         2 foreach my $var (@{ $cfg->{ $_ } }) {
  1         4  
396 2         7 $self->{ CASEVARS }->{ $var } = 1;
397             }
398             }
399             else {
400 0         0 $self->_warn("CASEVARS option expects an array ref\n");
401             }
402 1         3 next;
403             };
404              
405             # MAGIC needs a little processing to convert to a 2 element
406             # ARRAY ref if a single string was specified (i.e. for both)
407 4 50       12 /^MAGIC$/i && do {
408 0 0       0 if (ref($cfg->{ $_ }) eq 'ARRAY') {
409 0         0 $self->{ MAGIC } = $cfg->{ $_ };
410             }
411             else {
412             # create a 2-element array reference
413 0         0 $self->{ MAGIC } = [ ($cfg->{ $_ }) x 2 ];
414             }
415 0         0 next;
416             };
417              
418             # set ERROR/DEBUG handling function, checking for a CODE reference
419             # NOTE: error function is stored internally as 'ERRORFN' and not as
420             # 'ERROR' which is the object error status (backwards compatability).
421 4 50       19 /^(ERROR|DEBUG)(FN)?$/i && do {
422             # check this is a code reference
423 4 50       12 $self->_warn("Invalid \L$_\E function\n"), next
424             unless ref($cfg->{ $_ }) eq 'CODE';
425 4         21 $self->{ uc $1 . "FN" } = $cfg->{ $_ };
426 4         8 next;
427             };
428              
429             # FACTORY must contain a reference to a $FACTORY class or
430             # derivation of same
431 0 0       0 /^FACTORY$/i && do {
432 0 0       0 $self->_warn("Invalid factory object"), next
433             unless UNIVERSAL::isa($cfg->{ $_ }, $FACTORY);
434 0         0 $self->{ FACTORY } = $cfg->{ $_ };
435 0         0 next;
436             };
437              
438             # warn about unrecognised parameter
439 0         0 $self->_warn("Invalid configuration parameter: $_\n");
440             }
441              
442              
443              
444             # DEBUG code
445 11 50       58 if ($self->{ DEBUGLEVEL } & DBGCONF) {
446 0         0 $self->_DEBUG(DBGCONF, "$self Version $VERSION\n");
447              
448 0         0 foreach (keys %$self) {
449 0         0 $self->_DEBUG(DBGDATA, " %-10s => %s\n", $_, $self->{ $_ });
450             }
451             }
452             }
453              
454              
455              
456              
457             #========================================================================
458             #
459             # _parse_file($file)
460             #
461             # Attempts to locate a file with the filename as specified in $file.
462             # If the filename starts with a '/' or '.', it is assumed to be an absolute
463             # file path or one relative to the current working directory. In these
464             # cases, no attempt to look for it outside of its specified location is made.
465             # Otherwise, the directories specified in the LIB entry in the config hash
466             # array are searched followed by the current working directory. If the file
467             # is found, a number of member data items are initialised, the file is
468             # opened and then _parse($file) is called to parse the file.
469             #
470             # Returns the result from _parse($file) or undef on failure.
471             #
472             #========================================================================
473              
474             sub _parse_file {
475 30     30   58 my $self = shift;
476 30         58 my $file = shift;
477 30         47 my ($dir, $filepath);
478              
479              
480 30         330 $self->_DEBUG(DBGFUNC, "_parse_file($file)\n");
481              
482              
483             # default $filepath to $file (may be an absolute path)
484 30         55 $filepath = $file;
485              
486             # file is relative to $self->{ LIB } unless it starts '/' or '.'
487 30 50 33     294 if (defined($self->{ LIB }) && $filepath !~ /^[\/\.]/) {
488              
489 30         148 foreach $dir (split(/[|;:,]/, $self->{ LIB }), '.') {
490             # construct a full file path
491 39         70 $filepath = $dir;
492 39 50       139 $filepath .= '/' unless ($filepath =~ /\/$/);
493 39         83 $filepath .= $file;
494              
495             # test if the file exists
496 39 100       852 last if -f $filepath;
497             }
498             }
499              
500             # open file (may still fail if above loop dropped out the bottom)
501 30 50       292 unless (defined($self->{ FILE } = new FileHandle $filepath)) {
502 0         0 $self->_error("$filepath: $!");
503 0         0 return undef;
504             }
505              
506 30         2925 $self->_DEBUG(DBGINFO, "loading file: $filepath\n");
507              
508             # initialise file stats
509 30         70 $self->{ LINENO } = 0; # no of lines read from _get_line();
510 30         56 $self->{ PUTBACK } = 0; # no of lines put back via _unget_line();
511 30         62 $self->{ FILENAME } = $file;
512 30         78 $self->{ FILEPATH } = $filepath;
513 30         100 $self->{ INPUT } = "$file"; # used for error reporting
514              
515             # call _parse($file) and implicitly return result
516 30         103 $self->_parse($file);
517             }
518              
519              
520              
521             #========================================================================
522             #
523             # _parse_text($text, $symbol)
524             #
525             # Initialises the text member data so that _get_line() can read from it
526             # and then calls _parse() to parse the text contents. If $symbol is
527             # defined it is used as the symbol name which is then stored in the
528             # symbol table. If $symbol is undefined, the block remains anonymous.
529             #
530             # Returns the result from _parse().
531             #
532             #========================================================================
533              
534             sub _parse_text {
535 1     1   1 my $self = shift;
536 1         2 my $text = shift;
537 1         2 my $symbol = shift; # may be undef
538              
539              
540 1 50       9 $self->_DEBUG(DBGFUNC, "_parse_text($text, ",
541             defined $symbol ? $symbol : "", ")\n");
542              
543              
544             # set text string and initialise stats
545 1         3 $self->{ LINENO } = 0; # no of lines read from _get_line();
546 1         4 $self->{ PUTBACK } = 0; # no of lines put back via _unget_line();
547 1         3 $self->{ TEXT } = $text;
548 1         2 $self->{ INPUT } = "text string"; # used for error reporting
549              
550             # call _parse() and implicitly return result
551 1         4 $self->_parse($symbol);
552             }
553              
554              
555              
556             #========================================================================
557             #
558             # _parse($symbol)
559             #
560             # The _parse() method reads the current input stream which may originate
561             # from a file (_parse_file($file)) or a text string (_parse_text($text)).
562             # The contents are split into chunks of plain text or MetaText directives
563             # (enclosed by the MAGIC tokens). Text chunks are pushed directly onto
564             # an output list, while directives are parsed and blessed into a directive
565             # class before being pushed out. A reference to the output list is
566             # returned. If a symbol name is passed as the first parameter to parse(),
567             # then a corresponding entry in the $self->{ SYMTABLE } hash is created
568             # to reference this list.
569              
570             # Processing continues until EOF is reached or an %% END(BLOCK|IF)? %%
571             # directive is encountered.
572             #
573             # Blocks encountered that are bounded by a matched pair of %% BLOCK name %%
574             # ... %% ENDBLOCK %% directives will cause a recursive call to
575             # $self->_parse($blockname) to be made to handle the block definition for
576             # the sub-block. Block definitions can theoretically be nested indefinately
577             # although in practice, the process ends when an upper recursion limit is
578             # reached ($self->{ MAXDEPTH }). To this effect, $depth is used to
579             # internally indicate the current recursion depth to each instance.
580             #
581             #========================================================================
582              
583             sub _parse {
584 50     50   82 my $self = shift;
585 50         85 my $symbol = shift; # may be undef - i.e. anonymous symbol
586 50   100     184 my $depth = shift || 1;
587 50         84 my ($magic1, $magic2);
588 0         0 my ($line, $nextline);
589 0         0 my ($symtabent, $factory, $directive);
590              
591              
592 50 50       178 $self->_DEBUG(DBGFUNC, "_parse(%s)\n", defined $symbol ? $symbol : "");
593              
594              
595             # check for excessive recursion
596 50 50       173 if ($depth > $self->{ MAXDEPTH }) {
597 0         0 $self->_error("Maximum recursion exceeded in _parse()");
598 0         0 return undef;
599             }
600              
601             # get a local copy of the MAGIC symbols for efficiency
602 50         61 ($magic1, $magic2) = @{ $self->{ MAGIC } };
  50         143  
603              
604             # get a symbol table entry reference (an undefined $symbol causes
605             # an anonymous array ref to be returned).
606 50         152 $symtabent = $self->_symbol_entry($symbol);
607              
608             # clear any existing symbol table entry; this doesn't affect caching,
609             # BTW because _parse() only gets called when reload is necessary
610 50 100       706 splice(@$symtabent, 0) if scalar @$symtabent;
611              
612             # get a reference to the factory object used to create directives
613 50 50       185 return undef unless $factory = $self->_factory();
614              
615              
616             #
617             # main parsing loop begineth here
618             #
619              
620 50         151 READLINE: while (defined($line = $self->_get_line())) {
621              
622             # look to see if there is a directive in the line
623 772         6288 while ($line =~ /
624             (.*?) # anything preceeding a directive
625             $magic1 # opening directive marker
626             \s* # whitespace
627             (.*?) # directive contents
628             \s* # whitespace
629             (
630             ($magic2) # closing directive marker
631             (.*) # rest of the line
632             )? # directive may not be terminated
633             $ # EOL so it all gets eaten
634             /sx) {
635              
636            
637             #
638             # if the directive terminating symbol ($magic2) wasn't
639             # found in the line then it suggests that the directive
640             # continues onto the next line, so we append the next
641             # line and try again.
642             #
643 330 100       1571 unless ($4) {
644             # if we can't read another line, tack on the
645             # magic token to avoid a dangling directive
646 101 50       225 unless (defined($nextline = $self->_get_line())) {
647 0         0 $nextline = $magic2;
648 0         0 $self->_warn("Closing directive tag missing\n");
649             }
650 101         212 chomp($line);
651             # add a space and the next line
652 101         189 $line .= " $nextline";
653 101         4471 next;
654             }
655              
656             #
657             # at this point, we have a line that has a complete directive
658             # ($2) enclosed within it, perhaps with leading ($1) and
659             # trailing ($5) text
660             #
661              
662             # push any preceding text into the output list
663 229 100       707 push(@$symtabent, $1) if length $1;
664              
665             # anything coming after the directive gets re-queued.
666             # CHOMP can be set to remove straggling newlines
667             $self->_unget_line($5)
668 229 100 100     1534 unless $self->{ CHOMP } && $5 eq "\n";
669 229         332 $line = "";
670              
671 229 50       582 if (defined $2) {
672              
673             # get the create a new Text::MetaText::Directive object
674 229         715 $directive = $factory->create_directive($2);
675              
676             # check everything worked OK. eval? bletch!
677 229 50       781 unless (defined $directive) {
678 0         0 $self->_parse_error($factory->error());
679 0         0 return undef;
680             }
681              
682 229         353 my $tt = "Directive created:\n";
683 229         744 foreach (keys %$directive) {
684 1027         4084 $tt .= sprintf(" %-16s => %s\n",
685             $_, $directive->{ $_ });
686             }
687 229         1390 $tt .= " params:\n";
688 229 50       267 foreach (keys %{ $directive->{ PARAMS } || { } }) {
  229         1396  
689             $tt .= sprintf(" %-16s => %s\n",
690 118         426 $_, $directive->{ PARAMS }->{ $_ });
691             }
692 229         616 $self->_DEBUG(DBGTEST, $tt);
693              
694             #
695             # some specialist processing required depending on
696             # $directive->{ TYPE }
697             #
698              
699             # END(BLOCK|IF)? marks the end of a defined block
700 229 100       675 $directive->{ TYPE } =~ /^END(BLOCK|IF)?$/ && do {
701              
702             # save a copy of the tag that ended this block
703             # so that the calling method can check it
704 19         39 $self->{ ENDTAG } = $directive->{ TYPE };
705              
706             # return the symbol table list
707 19         1088 return $symtabent;
708             };
709              
710             # BLOCK directive defines a sub-block
711 210 100       489 $directive->{ TYPE } eq 'BLOCK' && do {
712              
713             # clear ENDTAG data
714 19         43 $self->{ ENDTAG } = "";
715              
716             # we recursively call $self->_parse() to parse the
717             # block and return a reference to the symbol table
718             # entry;
719             my $block = $self->_parse(
720 19         89 $directive->{ IDENTIFIER }, $depth + 1);
721              
722             # check comething was returned
723 19 50       61 return undef unless defined $block;
724              
725             # test that the directive that terminated the block
726             # was END(BLOCK)?
727 19 50       93 unless ($self->{ ENDTAG } =~ /^END(BLOCK)?$/) {
728 0         0 $self->_parse_error("ENDBLOCK expected");
729 0         0 return undef;
730             }
731              
732             # if the 'TRIM' option is defined, we should remove
733             # any leading newline and the final newline from the
734             # last line.
735 19 100       97 if (defined $directive->{ TRIM }
    100          
736             ? $directive->{ TRIM }
737             : $self->{ TRIM }) {
738 15 100       59 shift @$block
739             if $block->[0] eq "\n";
740 15         26 chomp($block->[ $#{ $block } ]);
  15         47  
741             }
742              
743             # if the 'PRINT' option was defined, we convert the
744             # BLOCK directive to an INCLUDE and push it onto the
745             # symbol table so that it gets processed and a copy
746             # of the BLOCK gets pushed to the output
747 19 50       57 if (defined($directive->{ PRINT })) {
748 0         0 $directive->{ TYPE } = 'INCLUDE';
749 0         0 push(@$symtabent, $directive);
750             }
751              
752             # loop to avoid directive getting (re-)pushed below
753 19         163 next;
754             };
755              
756             # push the directive onto the symbol table list
757 191         1279 push(@$symtabent, $directive);
758              
759             } # if (defined($2))
760              
761             } # while ($line =~ ...
762              
763             # anything remaining in $line must be plain text
764 753 100       2855 push(@$symtabent, $line) if length($line);
765              
766             } # READLINE: while...
767              
768             # return a reference to the 'compiled' symbol table entry
769 31         3685 $symtabent;
770             }
771              
772              
773              
774             #========================================================================
775             #
776             # _process($symbol, \%tags, $depth)
777             #
778             # $symbol is a scalar holding the name of a known symbol or a reference
779             # to an array which contains the nodes for an anonymous symbol. In the
780             # former case, the symbol is referenced from the symbol table by calling
781             # $self->_symbol_entry($symbol). In the latter case, the method simply
782             # iterates through the elements of the $symbol array reference.
783             #
784             # Each element in the symbol table entry array is expected to be a simple
785             # scalar containing plain text or a MetaText directive - an instance of
786             # the Text::MetaText::Directive class. Plain text is pushed straight
787             # through to an output queue. Directves are processed according to
788             # their type (e.g. INCLUDE, DEFINE, SUBST, etc) and the resulting output
789             # is pushed onto the output queue.
790             #
791             # The method returns a concatenation of the output list or undef on
792             # error.
793             #
794             #========================================================================
795              
796             sub _process {
797 85     85   274 my $self = shift;
798 85         114 my $symbol = shift;
799 85   50     228 my $tags = shift || {};
800 85   100     256 my $depth = shift || 1;
801 85         122 my ($symtabent, $factory, $directive, $item, $type, $space);
802 0         0 my ($ident);
803 0         0 my $proctext;
804              
805 85         143 my @output = ();
806              
807              
808 85         391 $self->_DEBUG(DBGFUNC, "_process($symbol, $tags, $depth)\n");
809              
810              
811             # check for excessive recursion
812 85 50       238 if ($depth > $self->{ MAXDEPTH }) {
813 0         0 $self->_error("Maximum recursion exceeded");
814 0         0 return undef;
815             }
816              
817             # $symbol may be a reference to an anonymous block array...
818 85 50       6998 if (ref($symbol) eq 'ARRAY') {
819 0         0 $symtabent = $symbol;
820             }
821             # ...or a named symbol which may or may not have been pre-parsed
822             else {
823             # check the symbol has an entry in the symbol table
824 85 50       211 unless ($self->_symbol_defined($symbol)) {
825 0         0 $self->_error("$symbol: no such block defined");
826 0         0 return undef;
827             }
828 85         221 $symtabent = $self->_symbol_entry($symbol);
829             }
830              
831             # get a reference to the factory object and call directive_type()
832             # to determine the kind of Directive objects it creates
833 85 50       205 return undef unless $factory = $self->_factory();
834 85         305 $directive = $factory->directive_type();
835              
836              
837             #
838             # The symbol table entry is an array reference passed explicitly in
839             # $symbol or retrieved by calling $self->_symbol_entry($symbol);
840             # Each element in the array can be either a plain text string or an
841             # instance of the directive class created by the factory object.
842             # The former represent normal text blocks in the processed file, the
843             # latter represent pre-parsed MetaText directives (see _parse()) that
844             # have been created by the factory object. The factory provides the
845             # directive_type() method for determining the class type of these
846             # objects. A directive will contain some of the following elements,
847             # based on the directive type and other data defined in the directive
848             # block:
849             #
850             # $directive->{ TYPE } # directive type: INCLUDE, DEFINE, etc
851             # $directive->{ IDENTIFIER } # target, i.e. INCLUDE
852             # $directive->{ PARAMS } # hash ref of variables defined
853             # $directive->{ PARAMSTR } # original parameter string
854             # $directive->{ IF } # an "if=..." conditional
855             # $directive->{ UNLESS } # ditto "unless=..."
856             # $directive->{ DELIMITER } # delimiter string (see _evaluate())
857             # $directive->{ FILTER } # print filter name and params
858             # $directive->{ FORMAT } # print format
859             #
860              
861             # process each each line from the block
862 85         174 foreach $item (@$symtabent) {
863              
864             # get rid of the non-directive cases first...
865 876 100       8161 unless (UNIVERSAL::isa($item, $directive)) {
866              
867             # return content if we find the end-of-content marker
868 656 100       2682 return join("", @output)
869             if $item =~ /^__(MT)?END__$/;
870              
871             # not a directive - so just push output and loop
872 650         952 push(@output, $item);
873              
874 650         965 next;
875             }
876              
877              
878             # examine any conditionals (if/unless) if defined
879 220 100       526 if ($item->{ HAS_CONDITION }) {
880              
881             # test any "if=" statement...
882 42 100       108 if (defined $item->{ IF }) {
883             my $result = $self->_evaluate($item->{ IF }, $tags,
884 40   33     197 $item->{ DELIMITER } || $self->{ DELIMITER });
885 40 100 66     209 next unless defined($result) && $result > 0;
886             }
887              
888             # ...and/or any "unless=" statement
889 22 100       61 if (defined $item->{ UNLESS }) {
890             my $result = $self->_evaluate($item->{ UNLESS }, $tags,
891 2   33     19 $item->{ DELIMITER } || $self->{ DELIMITER });
892 2 100 66     15 next if defined($result) && $result != 0;
893             }
894             }
895              
896            
897             # we take a copy of the directive TYPE and IDENTIFIER (operand)
898 199         319 $type = $item->{ TYPE };
899 199         304 $ident = $item->{ IDENTIFIER };
900              
901              
902             #------------------------------------
903             # switch ($type)
904             #
905              
906 199 100       423 $type eq 'DEFINE' && do {
907              
908             # $tags is a hash array ref passed in to _process(). We must
909             # clone it before modification in case we should accidentally
910             # update the caller's hash.
911 37         191 $tags = { %$tags };
912              
913             # merge in parameters defined within the INCLUDE directive
914 37         171 $self->_integrate_params($tags, $item->{ PARAMS });
915            
916 37         76 next;
917             };
918              
919 162 100       365 $type eq 'INCLUDE' && do {
920              
921             # an INCLUDE identifier is allowed to contain variable
922             # references which must be interpolated.
923 59         160 $ident = $self->_interpolate($ident, $tags);
924              
925             # clone the existing tags
926 59         327 my $newtags = { %$tags };
927              
928             # merge in parameters defined within the INCLUDE directive
929 59         196 $self->_integrate_params($newtags, $item->{ PARAMS });
930              
931             # process the INCLUDE'd symbol and check return
932 59         205 $proctext = $self->process_file($ident, $newtags, $depth + 1);
933 59 50       144 return undef unless defined $proctext;
934              
935             # push text onto output list, post-processing it along the way
936             # if $self->{ HAS_POSTPROC } is true (i.e. has filter/format)
937             push(@output,
938             $item->{ HAS_POSTPROC }
939 59 100       181 ? $self->_post_process($item, $proctext)
940             : $proctext);
941              
942 59         151 next;
943             };
944              
945 103 50       244 $type eq 'SUBST' && do {
946              
947             # call _substitute to handle token substitution
948 103         230 $proctext = $self->_substitute($item, $tags);
949              
950 103 100       311 if (defined($proctext)) {
951             $proctext = $self->_post_process($item, $proctext)
952 98 100       269 if $item->{ HAS_POSTPROC };
953             }
954             else {
955             # unrecognised token
956             $self->_warn("Unrecognised token: $item->{ IDENTIFIER }\n")
957 5 100       30 if defined $self->{ ROGUE }->{ WARN };
958              
959             # resolve nothing if 'delete' is defined as a ROGUE option
960             $proctext = $self->{ ROGUE }->{ DELETE }
961             ? ""
962             : $self->{ MAGIC }->[ 0 ] # rebuild directive
963             . " "
964             . $item->{ PARAMSTR }
965             . " "
966 5 100       34 . $self->{ MAGIC }->[ 1 ];
967             }
968              
969 103         157 push(@output, $proctext);
970              
971 103         146 next;
972             };
973              
974             # default: invalid directive; this shouldn't happen
975 0         0 $self->_warn("Unrecognise directive: $type\n")
976              
977             #
978             # switch ($type)
979             #------------------------------------
980             }
981              
982             # join output tokens and return as a single line
983 79         628 join("", @output);
984             }
985              
986              
987              
988             #========================================================================
989             #
990             # _get_line()
991             #
992             # Returns the next pending line of text to be processed from the input
993             # file or text string. If there are no pending lines already in the
994             # queue, it reads a line of text from the file handle, $self->{ FILE }.
995             # If $self->{ FILE } is undefined, it looks at $self->{ TEXT }, splits
996             # the contents into lines and pushes them onto the pending line list.
997             # The next pending line in the list can then be returned.
998             #
999             # Return a string representing the next input line or undef if no further
1000             # lines are available (at EOF for example).
1001             #
1002             #========================================================================
1003              
1004             sub _get_line {
1005 904     904   1466 my $self = shift;
1006              
1007              
1008             $self->_DEBUG(DBGFUNC, "_get_line() (%s #%d)\n",
1009 904         3683 $self->{ INPUT }, $self->{ LINENO } + 1);
1010              
1011              
1012             # if there are no lines pending, we try to add some to the queue
1013 904 100       2469 unless (@{ $self->{ LINES } }) {
  904         3150  
1014              
1015 679 100       1348 if (defined $self->{ FILE }) {
    100          
1016             # read from the file
1017 647         664 push(@{ $self->{ LINES } }, $self->{ FILE }->getline());
  647         19410  
1018              
1019             # close file if done
1020 647 100       41028 $self->{ FILE } = undef if $self->{ FILE }->eof();
1021             }
1022             elsif (defined $self->{ TEXT }) {
1023             # split from the text line
1024 1         2 push(@{ $self->{ LINES } }, split(/^/m, $self->{ TEXT }));
  1         5  
1025 1         2 $self->{ TEXT } = undef;
1026             }
1027              
1028             # no default
1029             }
1030              
1031             # LINENO is incremented to indicate that another line has been read,
1032             # unless PUTBACK indicates that there are requeued lines.
1033 904 100       6943 if ($self->{ PUTBACK }) {
1034 223         1300 $self->{ PUTBACK }--;
1035             }
1036             else {
1037 681         983 $self->{ LINENO }++;
1038             }
1039              
1040             # return the next token (may be undef to indicate end of stream)
1041 904         1004 return shift(@{ $self->{ LINES } });
  904         4438  
1042              
1043             }
1044              
1045              
1046              
1047             #========================================================================
1048             #
1049             # _unget_line($line)
1050             #
1051             # Unshifts the specified line, $line, onto the front of the pending
1052             # lines queue. Does nothing if $line is undefined. Effectively the
1053             # complement of _get_line(). The PUTBACK variable variable is
1054             # incremented. The _get_line() method uses this as an indication that
1055             # the line is re-queued and decrements PUTBACK instead of incrementing
1056             # LINENO as per usual.
1057             #
1058             #========================================================================
1059              
1060             sub _unget_line {
1061 223     223   292 my $self = shift;
1062 223         380 my $line = shift;
1063              
1064              
1065 223 50       443 return unless defined $line;
1066              
1067 223         221 my $safeline;
1068 223         929 ($safeline = $line) =~ s/%/%%/g;
1069             $self->_DEBUG(DBGFUNC, "_unget_line(\"$safeline\") (#%d)\n",
1070 223         808 $self->{ LINENO } - 1);
1071              
1072             # increment PUTBACK to indicate there are re-queued lines
1073 223         369 $self->{ PUTBACK }++;
1074              
1075             # unshift (defined) line onto front of list
1076 223         272 unshift(@{ $self->{ LINES } }, $line);
  223         592  
1077             }
1078              
1079              
1080              
1081             #========================================================================
1082             #
1083             # _factory()
1084             #
1085             # Returns a reference to the factory object stored in $self->{ FACTORY }.
1086             # If this is undefined, an attempt is made to instantiate a factory
1087             # object from the default class, $FACTORY, which is then stored in the
1088             # $self->{ FACTORY } hash entry.
1089             #
1090             # Returns a reference to the factory object. On failure, undef is returned
1091             # and a warning is issued via _warn().
1092             #
1093             #========================================================================
1094              
1095             sub _factory {
1096 135     135   242 my $self = shift;
1097              
1098              
1099             # create a default factory if one doesn't already exist
1100 135 100       361 unless (defined $self->{ FACTORY }) {
1101             # $FACTORY is the default factory package
1102 13 50       124 $self->{ FACTORY } = $FACTORY->new()
1103             or $self->_error(
1104             "Factory construction failed: "
1105             . ""
1106             );
1107             }
1108              
1109             # return factory reference
1110 135         628 $self->{ FACTORY };
1111             }
1112              
1113              
1114              
1115             #========================================================================
1116             #
1117             # _symbol_name($symbol)
1118             #
1119             # Returns the name by which $symbol might be referenced in the symbol
1120             # table. Applies case folding (to lower case) unless CASE sensitivity
1121             # is set.
1122             #
1123             #========================================================================
1124              
1125             sub _symbol_name {
1126 306     306   375 my $self = shift;
1127 306         1436 my $symbol = shift;
1128              
1129              
1130 306         801 $self->_DEBUG(DBGFUNC, "_symbol_name($symbol)\n");
1131              
1132              
1133             # convert symbol to lower case unless CASE sensitivity is set
1134 306 50       869 $symbol = lc $symbol unless $self->{ CASE };
1135              
1136 306         1717 return $symbol;
1137             }
1138              
1139              
1140              
1141             #========================================================================
1142             #
1143             # _symbol_defined($symbol)
1144             #
1145             # Returns 1 if the symbol, $symbol, is defined in the symbol table or
1146             # 0 if not.
1147             #
1148             #========================================================================
1149              
1150             sub _symbol_defined {
1151 170     170   221 my $self = shift;
1152 170         220 my $symbol = shift;
1153              
1154              
1155 170         516 $self->_DEBUG(DBGFUNC, "_symbol_defined($symbol)\n");
1156              
1157              
1158             # call _symbol_name() to apply any name munging
1159 170         381 $symbol = $self->_symbol_name($symbol);
1160              
1161             # return 1 or 0 based on existence of symbol table entry
1162 170 100       719 return exists $self->{ SYMTABLE }->{ $symbol } ? 1 : 0;
1163             }
1164              
1165              
1166              
1167             #========================================================================
1168             #
1169             # _symbol_entry($symbol)
1170             #
1171             # Returns a reference to the symbol table entry for $symbol. If there
1172             # is no corresponding symbol currently loaded in the table, the symbol
1173             # table entry is initiated to an empty array reference, [], and that
1174             # value is returned. This list can then be filled, via the reference,
1175             # to populate the symbol table entry. The symbol name, $symbol, may be
1176             # converted to lower case (via _symbol_name($symbol)) unless case
1177             # sensitivity ($self->{ CASE }) is set.
1178             #
1179             # Returns a reference to the array that represents the symbol table
1180             # entry for the specified entry.
1181             #
1182             #========================================================================
1183              
1184             sub _symbol_entry {
1185 136     136   190 my $self = shift;
1186 136         166 my $symbol = shift;
1187              
1188              
1189 136 50       505 $self->_DEBUG(DBGFUNC, "_symbol_entry(%s)\n",
1190             defined $symbol ? $symbol : "");
1191              
1192              
1193             # an undefined symbol gets an anonymous array
1194 136 50       282 return [] unless defined $symbol;
1195              
1196             # determine the real symbol name accounting for case folding
1197 136         268 $symbol = $self->_symbol_name($symbol);
1198              
1199             # create empty table entry for a new symbol
1200             $self->{ SYMTABLE }->{ $symbol } = []
1201 136 100       523 unless defined $self->{ SYMTABLE }->{ $symbol };
1202              
1203             # return reference to symbol table entry
1204 136         3032 $self->{ SYMTABLE }->{ $symbol };
1205             }
1206              
1207              
1208              
1209             #========================================================================
1210             #
1211             # _variable_name($variable)
1212             #
1213             # Returns the name by which $symbol might be referenced. Removes any
1214             # extraneous leading '$' and folds to lower case unless CASE sensitivity
1215             # is set.
1216             #
1217             # Returns the (perhaps modified) variable name.
1218             #
1219             #========================================================================
1220              
1221             sub _variable_name {
1222 213     213   251 my $self = shift;
1223 213         267 my $variable = shift;
1224              
1225              
1226 213         535 $self->_DEBUG(DBGFUNC, "_variable_name($variable)\n");
1227              
1228              
1229             # strip leading '$'
1230 213         826 $variable =~ s/^\$//;
1231              
1232             # convert symbol to lower case unless CASE sensitivity is set
1233 213 50       591 $variable = lc $variable unless $self->{ CASE };
1234              
1235 213         463 return $variable;
1236             }
1237              
1238              
1239              
1240             #========================================================================
1241             #
1242             # _variable_value($variable, $tags)
1243             #
1244             # Returns the value associated with the variable as named in $variable.
1245             # $variable may be modified (by _variable_name()) which removes any
1246             # leading '$' and folding case unless $self->{ CASE } is set. The
1247             # resulting variable name is then used to index into $tags to return
1248             # the associated value.
1249             #
1250             # Returns the value from $tags associated with $variable or undef if not
1251             # defined.
1252             #
1253             #========================================================================
1254              
1255             sub _variable_value {
1256 123     123   171 my $self = shift;
1257 123         155 my $variable = shift;
1258 123         139 my $tags = shift;
1259              
1260              
1261 123         410 $self->_DEBUG(DBGFUNC, "_variable_value($variable, $tags)\n");
1262              
1263              
1264             # examine the CASEVARS which lists vars not for CASE folding
1265             return $tags->{ $variable }
1266 123 100 66     397 if (defined $self->{ CASEVARS }->{ $variable }
1267             && defined $tags->{ $variable });
1268              
1269             # special case(s)
1270 122 100       267 return time() if $variable eq 'TIME';
1271              
1272             # apply any case folding rules to the variable name
1273 120         219 $variable = $self->_variable_name($variable);
1274              
1275             # return the associated value
1276 120         611 return $tags->{ $variable };
1277             }
1278              
1279              
1280              
1281             #========================================================================
1282             #
1283             # _interpolate($expr, $tags)
1284             #
1285             # Examines the string expression, $expr, and attempts to replace any
1286             # elements within the string that relate to key names in the hash table
1287             # referenced by $tags. A simple "$variable" subsititution is identified
1288             # when separated by non-word characters
1289             #
1290             # e.g. "foo/$bar/baz" => "foo/" . $tags->{'bar'} . "/baz"
1291             #
1292             # Ambiguous variable names can be explicitly resolved using braces as per
1293             # Unix shell syntax.
1294             #
1295             # e.g. "foo${bar}baz" => "foo" . $tags{'bar'} . "baz"
1296             #
1297             # The function returns a newly constructed string. If $expr is a reference
1298             # to a scalar, the original scalar is modified and also returned.
1299             #
1300             #========================================================================
1301              
1302             sub _interpolate {
1303 152     152   301 my $self = shift;
1304 152         202 my $expr = shift;
1305 152   50     350 my $tags = shift || {};
1306 152         165 my ($s1, $s2);
1307              
1308              
1309 152         542 $self->_DEBUG(DBGFUNC, "_interpolate($expr, $tags)\n");
1310              
1311              
1312             # if a reference is passed, work on the original, otherwise take a copy
1313 152 50       396 my $work = ref($expr) eq 'SCALAR' ? $expr : \$expr;
1314              
1315             # look for a "$identifier" or "${identifier}" and substitute
1316             # Note that we save $1 and $2 because they may get trounced during
1317             # the call to $self->_variable_value()
1318 152         356 $$work =~ s/ ( \$ \{? ([\w\.]+) \}? ) /
1319 20         52 ($s1, $s2) = ($1, $2);
1320 20 50       47 defined ($s2 = $self->_variable_value($2, $tags))
1321             ? $s2
1322             : $s1;
1323             /gex;
1324              
1325             # return modified string
1326 152         506 $$work;
1327             }
1328              
1329              
1330              
1331             #========================================================================
1332             #
1333             # _integrate_params($tags, $params, $lookup)
1334             #
1335             # Attempts to incorporate all the variables in the $params hash array
1336             # reference into the current tagset referenced by $tags. Any embedded
1337             # variable references in the $params values will be interpolated using
1338             # the values in the $lookup hash. If $lookup is undefined, the $tags
1339             # hash is used.
1340             #
1341             # e.g.
1342             # if $params->{'foo'} = 'aaa/$bar/bbb'
1343             # then $tags->{'foo'} = 'aaa' . $lookup->{'bar'} . 'bbb'
1344             #
1345             #========================================================================
1346              
1347             sub _integrate_params {
1348 100     100   132 my $self = shift;
1349 100   50     235 my $tags = shift || {};
1350 100   50     208 my $params = shift || {};
1351 100   66     375 my $lookup = shift || $tags;
1352 100         122 my ($v, $variable, $value);
1353              
1354            
1355 100         441 $self->_DEBUG(DBGFUNC, "_integrate_params($tags, $params, $lookup)\n");
1356              
1357              
1358             # iterate through each variable in $params
1359 100         410 foreach $v (keys %$params) {
1360              
1361             # get the real variable name
1362 93         212 $variable = $self->_variable_name($v);
1363              
1364             # interpolate any variable values in the parameter value
1365 93         247 $value = $self->_interpolate($params->{ $v }, $lookup);
1366              
1367             # copy variable and value into new tagset
1368 93         329 $tags->{ $variable } = $value
1369             }
1370             }
1371              
1372              
1373              
1374             #========================================================================
1375             #
1376             # _substitute($directive, $tags)
1377             #
1378             # Examines the SUBST directive referenced by $directive and looks to
1379             # see if the variable to which it refers ($directive->{ IDENTIFIER })
1380             # exists as a key in the hash table referenced by $tags.
1381             #
1382             # If a relevant hash entry does not exist and $self->{ EXECUTE } is set
1383             # to a true value, _substitute attempts to run the directive name as a
1384             # class method, allowing derived (sub) classes to define member functions
1385             # that get called automagically by the base class. If $self->{ EXECUTE }
1386             # has a value > 1, it attempts to run a function in the main package with
1387             # the same name as the identifier. If all that fails, undef is returned.
1388             #
1389             #========================================================================
1390              
1391             sub _substitute {
1392 103     103   135 my $self = shift;
1393 103         118 my $directive = shift;
1394 103         117 my $tags = shift;
1395 103         154 my $ident = $directive->{ IDENTIFIER };
1396 103         118 my ($value, $fn);
1397              
1398              
1399 103         403 $self->_DEBUG(DBGFUNC, "_substitute($directive, $tags)\n");
1400              
1401              
1402             # get the variable value if it is defined
1403 103 100       1576 return $value
1404             if defined ($value = $self->_variable_value($ident, $tags));
1405              
1406             # nothing more to do unless EXECUTE is true
1407             return undef
1408 9 100       33 unless $self->{ EXECUTE };
1409              
1410             # extract the original parameter string
1411 4   50     21 my $prmstr = $directive->{ PARAMSTR } || '';
1412 4         7 my $prmhash = { };
1413              
1414             # create a new set of directive tags, interpolating any embedded vars
1415 4         12 $self->_integrate_params($prmhash, $directive->{ PARAMS }, $tags);
1416              
1417             # execute $ident class method if EXECUTE is defined and $ident exists
1418 4 100 66     61 if ($self->{ EXECUTE } && $self->can($ident)) {
1419 2         8 $self->_DEBUG(DBGINFO, "executing $self->$ident\n");
1420 2         10 return $self->$ident($prmhash, $prmstr)
1421             }
1422            
1423             # if EXECUTE is set > 1, we try to run it as a function in the main
1424             # package. We examine the main symbol table to see if the function
1425             # exists, otherwise we return undef.
1426              
1427 2 50       8 return undef unless $self->{ EXECUTE } > 1;
1428              
1429             # get a function reference from the main symbol table
1430 2         15 local *glob = $main::{ $ident };
1431             return undef
1432 2 50       8 unless defined($fn = *glob{ CODE });
1433              
1434 2         10 $self->_DEBUG(DBGINFO, "executing main::$ident\n");
1435              
1436             # execute the function and implicitly return result
1437 2         3 &{ $fn }($prmhash, $prmstr);
  2         7  
1438             }
1439              
1440              
1441              
1442             #========================================================================
1443             #
1444             # _evaluate($expr, \%tags, $delimiter)
1445             #
1446             # Evaluates the specified expression, $expr, using the token values in
1447             # the hash array referenced by $tags. The $delimiter parameter may also
1448             # be passed to over-ride the default delimiter ($self->{ DELIMITER })
1449             # which is used when splitting 'in' lists for evalutation
1450             # (e.g. if="name in Tom,Dick,Harry").
1451             #
1452             # Returns 1 if the expression evaluates true, 0 if it evaluates false.
1453             # On error (e.g. a badly formed expression), undef is returned.
1454             #
1455             # NOTE: This method is ugly, slow and buggy. For most uses, it will do
1456             # the job admirably, but don't necessarily trust it to do 100% what you
1457             # expect if your expressions start to get very complicated. In
1458             # particular, multiple nested parenthesis may not evaluate with the
1459             # correct precedence, or indeed at all. The method has to parse and
1460             # evaluate the $expr string every time it is run. This will start to
1461             # slow your processing down if you do a lot of conditional tests. In
1462             # the future, it is likely to be compiled down to an intermediate form
1463             # to improve execution speed.
1464             #
1465             #========================================================================
1466              
1467             sub _evaluate {
1468 54     54   77 my $self = shift;
1469 54         75 my $expr = shift;
1470 54         61 my $tags = shift;
1471 54   66     141 my $delim = shift || $self->{ DELIMITER };
1472 54         56 my ($lhs, $rhs, $sub, $op, $result);
1473              
1474             # save a copy of the original expression for debug purposes
1475 54         62 my $original = $expr;
1476              
1477             # a hash table of comparison operators and associated functions
1478             my $compare = {
1479 4     4   13 '==' => sub { $_[0] eq $_[1] },
1480 2     2   5 '=' => sub { $_[0] eq $_[1] },
1481 2     2   5 '!=' => sub { $_[0] ne $_[1] },
1482 8     8   26 '>=' => sub { $_[0] ge $_[1] },
1483 0     0   0 '<=' => sub { $_[0] le $_[1] },
1484 8     8   107 '>' => sub { $_[0] gt $_[1] },
1485 4     4   13 '<' => sub { $_[0] lt $_[1] },
1486 0     0   0 '=~' => sub { $_[0] =~ /$_[1]/ },
1487 0     0   0 '!~' => sub { $_[0] !~ /$_[1]/ },
1488 0     0   0 'in' => sub { grep(/^$_[0]$/, split(/$delim/, $_[1])) },
1489 54         887 };
1490             # define a regex to match the comparison keys; note that alpha words
1491             # (\w+) must be protected by "\b" boundary assertions and that order
1492             # is extremely important (so as to match '>=' before '>', for example)
1493 54         190 my $compkeys = join('|', qw( \bin\b <= >= < > =~ !~ != == = ));
1494              
1495             # a hash table of boolean operators and associated functions
1496             my $boolean = {
1497 2 50   2   11 '&&' => sub { $_[0] && $_[1] },
1498 0 0   0   0 '||' => sub { $_[0] || $_[1] },
1499 0     0   0 '^' => sub { $_[0] ^ $_[1] },
1500 4 100   4   17 'and' => sub { $_[0] and $_[1] },
1501 0 0   0   0 'or' => sub { $_[0] or $_[1] },
1502 0   0 0   0 'xor' => sub { $_[0] xor $_[1] },
1503 54         522 };
1504 324 100       1562 my $boolkeys = join('|',
1505 54         192 map { /^\w+$/ ? "\\b$_\\b" : "\Q$_" } keys %$boolean);
1506              
1507              
1508             # DEBUG code
1509 54         249 $self->_DEBUG(DBGFUNC, "_evaluate($expr, $tags)\n");
1510 54         152 foreach (keys %$tags) {
1511 213         548 $self->_DEBUG(DBGEVAL | DBGDATA, " eval: %-10s -> %s\n",
1512             $_, $tags->{ $_ });
1513             }
1514              
1515              
1516             # trounce leading and trailing whitespace
1517 54         114 foreach ($expr) {
1518 54         102 s/^\s+//;
1519 54         197 s/\s+$//g;
1520             }
1521              
1522 54         229 $self->_DEBUG(DBGEVAL, "EVAL: expr: [$expr]\n");
1523              
1524             # throw back expressions already fully simplified; note that we evaluate
1525             # expressions as strings to avoid implicit true/false evaluation
1526 54 100 100     233 if ($expr eq '1' or $expr eq '0') {
1527 16         52 $self->_DEBUG(DBGEVAL, "EVAL: fully simplified: $expr\n");
1528 16         282 return $expr;
1529             }
1530              
1531              
1532             #
1533             # fully expand all expressions in parenthesis
1534             #
1535              
1536 38         106 while ($expr =~ /(.*?)\(([^\(\)]+)\)(.*)/) {
1537 0         0 $lhs = $1;
1538 0         0 $sub = $2;
1539 0         0 $rhs = $3;
1540              
1541             # parse the parenthesised expression
1542 0 0       0 return undef unless defined($sub = $self->_evaluate($sub, $tags));
1543              
1544             # build a new expression
1545 0         0 $expr = "$lhs $sub $rhs";
1546             }
1547              
1548             # check there aren't any hanging parenthesis
1549 38 50       95 $expr =~ /[\(\)]/ && do {
1550 0         0 $self->_warn("Unmatched parenthesis: $expr\n");
1551 0         0 return undef;
1552             };
1553              
1554              
1555             #
1556             # divide expression by the first boolean operator
1557             #
1558              
1559 38 100       1179 if ($expr =~ /(.*?)\s*($boolkeys)\s*(.*)/) {
1560              
1561 6         11 $lhs = $1;
1562 6         12 $op = $2;
1563 6         8 $rhs = $3;
1564              
1565 6         37 $self->_DEBUG(DBGEVAL, "EVAL: boolean split: [$lhs] [$op] [$rhs]\n");
1566              
1567             # evaluate expression using relevant operator
1568 6 100       21 $result = &{ $boolean->{ $op } }(
  6         17  
1569             $lhs = $self->_evaluate($lhs, $tags),
1570             $rhs = $self->_evaluate($rhs, $tags)
1571             ) ? 1 : 0;
1572            
1573 6         30 $self->_DEBUG(DBGEVAL,
1574             "EVAL: bool: [$original] => [$lhs] [$op] [$rhs] = $result\n");
1575 6         99 return $result;
1576             }
1577              
1578              
1579             #
1580             # divide expression by the first comparitor
1581             #
1582              
1583 32         46 $lhs = $expr;
1584 32         51 $rhs = $op = '';
1585              
1586 32 100       364 if ($expr =~ /^\s*(.*?)\s*($compkeys)\s*(.*?)\s*$/) {
1587 28         50 $lhs = $1;
1588 28         43 $op = $2;
1589 28         43 $rhs = $3;
1590              
1591 28         106 $self->_DEBUG(DBGEVAL, "EVAL: compare: [$lhs] [$op] [$rhs]\n");
1592             }
1593              
1594             #
1595             # cleanup, rationalise and/or evaluate left-hand side
1596             #
1597              
1598             # left hand side is automatically dereferenced so remove any explicit
1599             # dereferencing '$' character at the start
1600 32         78 $lhs =~ s/^\$//;
1601              
1602             # convert lhs to lower case unless CASE sensitive
1603 32 50       90 $lhs = lc $lhs unless $self->{ CASE };
1604              
1605 32   100     147 $self->_DEBUG(DBGEVAL, "EVAL: expand lhs: \$$lhs => %s\n",
1606             $tags->{ $lhs } || "");
1607              
1608             # dereference the lhs variable
1609 32   100     95 $lhs = $tags->{ $lhs } || 0;
1610              
1611              
1612             #
1613             # no comparitor implies lhs is a simple true/false evaluated variable
1614             #
1615              
1616 32 100       61 unless ($op) {
1617 4 100       23 $self->_DEBUG(DBGEVAL, "EVAL: simple: [$lhs] = %s\n", $lhs ? 1 : 0);
1618 4 100       98 return $lhs ? 1 : 0;
1619             }
1620              
1621              
1622             #
1623             # de-reference RHS of the equation ($comp) if it starts with a '$'
1624             #
1625              
1626 28 100       71 if ($rhs =~ s/^\$(.*)/$1/) {
1627              
1628             # convert variable name to lower case unless CASE sensitive
1629 4 50       12 $rhs = lc $rhs unless $self->{ CASE };
1630              
1631 4   50     34 $self->_DEBUG(DBGEVAL, "EVAL: expand rhs: $rhs => %s\n",
1632             $tags->{ $rhs } || "");
1633              
1634             # de-reference variables
1635 4   50     12 $rhs = $tags->{ $rhs } || 0;
1636             }
1637             else {
1638 24         72 $self->_DEBUG(DBGEVAL, "EVAL: rhs: [$rhs]\n");
1639             }
1640              
1641             # remove surrounding quotes from rhs value
1642 28         52 foreach ($rhs) {
1643 28         50 s/^["']//;
1644 28         74 s/["']$//;
1645             }
1646              
1647             # force both LHS and RHS to lower case unless CASE sensitive
1648 28 50       65 unless ($self->{ CASE }) {
1649 28         42 $lhs = lc $lhs;
1650 28         34 $rhs = lc $rhs;
1651             }
1652              
1653              
1654             #
1655             # evaluate the comparison statement
1656             #
1657              
1658 28 100       29 $result = &{ $compare->{"\L$op"} }($lhs, $rhs) ? 1 : 0;
  28         70  
1659              
1660 28         70 $self->_DEBUG(DBGEVAL, "EVAL: comp: [%s] => [%s] [%s] [%s] = %s\n",
1661             $original, $lhs, $op, $rhs, $result);
1662              
1663 28         524 $result;
1664             }
1665              
1666              
1667              
1668             #========================================================================
1669             #
1670             # _post_process($directive, $string)
1671             #
1672             # This function is called to post-process the output generated when
1673             # process() conducts a SUBST or an INCLUDE operation. The FILTER and
1674             # FORMAT parameters of the directive, $directive, are used to indicate
1675             # the type of post-processing required.
1676             #
1677             # Returns the processed string.
1678             #
1679             #========================================================================
1680              
1681             sub _post_process {
1682 12     12   24 my $self = shift;
1683 12         19 my $directive = shift;
1684 12         19 my $line = shift;
1685 12         68 my $formats = {
1686             QUOTED => '"%s"',
1687             DQUOTED => '"%s"',
1688             SQUOTED => "'%s'",
1689             MONEY => "%P%.2f", # '%P' says "use printf() not time2str()"
1690             };
1691 12         1851 my ($pre, $post);
1692 0         0 my @lines;
1693              
1694              
1695             # DEBUG code
1696 12 50       74 if ($self->{ DEBUGLEVEL } & DBGFUNC) {
1697 0         0 my $dbgline = $line;
1698 0         0 $dbgline =~ s/\n/\\n/g;
1699 0         0 $dbgline =~ s/\t/\\t/g;
1700 0 0       0 substr($dbgline, 0, 16) = "..."
1701             if length $dbgline > 16;
1702 0         0 $dbgline = "\"$dbgline\"";
1703 0         0 $self->_DEBUG(DBGFUNC, "_post_process($directive, $dbgline)\n");
1704             }
1705 12         54 $self->_DEBUG(DBGPOST, "Post-process: \n[$line]\n");
1706              
1707              
1708             # no need to do anything if there's nothing to operate on
1709 12 50 33     77 return "" unless defined $line && length $line;
1710              
1711             # split into lines, accounting for a trailing newline which would
1712             # otherwise be ignored by split()
1713 12         299 @lines = split(/\n/, $line);
1714 12 50       46 push(@lines, "") if chomp($line);
1715              
1716              
1717 12         45 $self->_DEBUG(DBGPOST, " -> [%s]\n" , join("]\n [", @lines));
1718              
1719              
1720             # see if the "FILTER" option is specified
1721 12 100       40 if (defined($directive->{ FILTER })) {
1722              
1723             # extract the filter name and parameters: ()
1724 4         31 $directive->{ FILTER } =~ /([^(]+)(?:\((.*)\))?/;
1725 4         14 my $fltname = $1;
1726              
1727             # split filter parameters and remove enclosing quotes
1728 4   50     131 my @fltparams = split(/\s*,\s*/, $2 || "");
1729 4         12 foreach (@fltparams) {
1730 5         12 s/^"//;
1731 5         13 s/"$//;
1732             }
1733              
1734              
1735             # is there a filter function with the name specified?
1736 4 50       21 if (ref($self->{ FILTER }->{ $fltname }) eq 'CODE') {
1737              
1738 4         20 $self->_DEBUG(DBGINFO, "filter: $fltname(%s)\n",
1739             join(", ", $fltname, @fltparams));
1740              
1741             # deref filter code to speed up multi-line processing
1742 4         9 my $fltfn = $self->{ FILTER }->{ $fltname };
1743              
1744             # feed each line through filter function
1745 4         8 foreach (@lines) {
1746 8         12 $pre = $_;
1747 8         11416 $_ = &$fltfn($fltname, $_, @fltparams);
1748 8         14 $post = $_;
1749              
1750 8 50       43 if ($self->{ DEBUGLEVEL } & DBGPOST) {
1751 0         0 $self->_DEBUG(DBGDATA,
1752             "filter: [ $pre ]\n -> [ $post ]\n");
1753             }
1754             }
1755             }
1756             else {
1757 0         0 $self->_warn("$fltname: non-existant or invalid filter\n");
1758             }
1759             }
1760              
1761              
1762             #
1763             # if the "format=