File Coverage

blib/lib/Data/Rlist.pm
Criterion Covered Total %
statement 800 1098 72.8
branch 420 718 58.5
condition 109 206 52.9
subroutine 85 105 80.9
pod 67 80 83.7
total 1481 2207 67.1


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2             # -*-cperl-*-
3            
4             =head1 VENUE
5            
6             Data::Rlist - A lightweight data language for Perl and C++
7            
8             =cut
9            
10             # $Writestamp: 2008-07-27 21:19:43 andreas$
11             # $Compile: perl -c Rlist.pm; pod2html --title="Random-Lists" Rlist.pm >../../Rlist.pm.html$
12             # $Comp1le: podchecker Rlist.pm$
13            
14             =head1 SYNOPSIS
15            
16             use Data::Rlist;
17            
18             File and string I/O for any Perl data F<$thing>:
19            
20             ### Compile data as text.
21            
22             WriteData $thing, $filename; # compile data into file
23             WriteData $thing, \$string; # compile data into buffer
24             $string_ref = WriteData $thing; # dto.
25            
26             $string = OutlineData $thing; # compile printable text
27             $string = StringizeData $thing; # compile text in a compact form (no newlines)
28             $string = SqueezeData $thing; # compile text in a super-compact form (no whitespace)
29            
30             ### Parse data from text.
31            
32             $thing = ReadData $filename; # parse data from file
33             $thing = ReadData \$string; # parse data from string buffer
34            
35             F>, F> etc. are L. Alternately we
36             use:
37            
38             ### Qualified functions to parse text.
39            
40             $thing = Data::Rlist::read($filename);
41             $thing = Data::Rlist::read($string_ref);
42             $thing = Data::Rlist::read_string($string_or_string_ref);
43            
44             ### Qualified functions to compile data into text.
45            
46             Data::Rlist::write($thing, $filename);
47             $string_ref = Data::Rlist::write_string($thing);
48             $string = Data::Rlist::write_string_value($thing);
49            
50             ### Print data to STDOUT.
51            
52             PrintData $thing;
53            
54             The object-oriented interface:
55            
56             ### For objects the '-output' attribute refers to a string buffer or is a filename.
57             ### The '-data' attribute defines the value or reference to be compiled into text.
58            
59             $object = new Data::Rlist(-data => $thing, -output => \$target)
60            
61             $string_ref = $object->write; # compile into $target, return \$target
62             $string_ref = $object->write_string; # compile into new string ($target not touched)
63             $string = $object->write_string_value; # dto. but return string value
64            
65             ### Print data to STDOUT.
66            
67             print $object->write_string_value;
68             print ${$object->write}; # returns \$target
69            
70             ### Set output file and write $thing to disk.
71            
72             $object->set(-output => ".foorc");
73            
74             $object->write; # write "./.foorc", return 1
75             $object->write(".barrc"); # write "./.barrc" (the filename overrides -output)
76            
77             ### The '-input' attribute defines the text to be compiled, either as
78             ### string reference or filename.
79            
80             $object->set(-input => \$input_string); # assign some text
81            
82             $thing = $object->read; # parse $input_string into Perl data
83             $thing = $object->read($other_string); # parse $other_string (the argument overrides -input)
84            
85             $object->set(-input => ".foorc"); # assign some input file
86            
87             $foorc = $object->read; # parse ".foorc"
88             $barrc = $object->read(".barrc"); # parse some other file
89             $thing = $object->read(\$string); # parse some string buffer
90             $thing = $object->read_string($string_or_ref); # dto.
91            
92             Create deep-copies of any Perl data. The metaphor "keelhaul" vividly connotes that F<$thing> is
93             stringified, then compiled back:
94            
95             ### Compile a value or ref $thing into text, then parse back into data.
96            
97             $reloaded = KeelhaulData $thing;
98             $reloaded = Data::Rlist::keelhaul($thing);
99            
100             $object = new Data::Rlist(-data => $thing);
101             $reloaded = $object->keelhaul;
102            
103             Do deep-comparisons of any Perl data:
104            
105             ### Deep-compare $a and $b and get a description of all type/value differences.
106            
107             @diffs = CompareData($a, $b);
108            
109             For more information see F>, F>, and F>.
110            
111             =head1 DESCRIPTION
112            
113             =head2 Venue
114            
115             F (Rlist) is a tag/value text format, which can "stringify" any data structure in
116             7-bit ASCII text. The basic types are lists and scalars. The syntax is similar, but not equal to
117             Perl's. For example,
118            
119             ( "hello", "world" )
120             { "hello" = "world"; }
121            
122             designates two lists, the first of which is sequential, the second associative. The format...
123            
124             - allows the definition of hierachical and constant data,
125            
126             - has no user-defined types, no keywords, no variables,
127            
128             - has no arithmetic expressions,
129            
130             - uses 7-bit-ASCII character encoding and escape sequences,
131            
132             - uses C-style numbers and strings,
133            
134             - has an extremely minimal syntax implementable in any programming language and system.
135            
136             You can write any Perl data structure into files as legible text. Like with CSV the lexical
137             overhead of Rlist is minimal: files are merely data.
138            
139             You can read compiled texts back in Perl and C++ programs. No information will be lost between
140             different program languages, and floating-point numbers keep their precision.
141            
142             You can also compile structured CSV text from Perl data, using special functions from this package
143             that will keep numbers precise and properly quote strings.
144            
145             Since Rlist has no user-defined types the data is structured out of simple scalars and lists. It
146             is conceivable, however, to develop a simple type system and store type information along with the
147             actual data. Otherwise the data structures are tacit consents between the users of the data. See
148             also the implemenation notes for L and L.
149            
150             =head2 Character Encoding
151            
152             Rlist text uses the 7-bit-ASCII character set. The 95 printable character codes 32 to 126 occupy
153             one character. Codes 0 to 31 and 127 to 255 require four characters each: the F<\> escape
154             character followed by the octal code number. For example, the German Umlaut character F>
155             (252) is translated into F<\374>. An exception are the following codes:
156            
157             ASCII ESCAPED AS
158             ----- ----------
159             9 tab \t
160             10 linefeed \n
161             13 return \r
162             34 quote " \"
163             39 quote ' \'
164             92 backslash \ \\
165            
166             =head2 Values and Default Values
167            
168             F are either scalars, array elements or the value of a pair. Each value is constant.
169            
170             The default scalar value is the empty string C<"">. So in Perl F is compiled into C<"">.
171            
172             =head2 Numbers, Strings and Here-Documents
173            
174             Numbers constants adhere to the IEEE 754 syntax for integer- and floating-point numbers (i.e., the
175             same lexical conventions as in C and C++ apply).
176            
177             Strings constants consisting only of C<[a-zA-Z_0-9-/~:.@]> characters "look like identifiers" (aka
178             symbols) need not to be quoted. Otherwise string constants follow the C language lexicography.
179             They strings must be placed in double-quotes (single-quotes are not allowed). Quoted strings are
180             also escaped (i.e., characters are converted to the input character set of 7-bit ASCII).
181            
182             You can define a string using a line-oriented form of quoting based on the UNIX shell
183             F syntax and RFC 111. Multiline quoted strings can be expressed with
184            
185             <
186            
187             Following the sigil F< << > an identifier specifies how to terminate the string scalar. The value
188             of the scalar will be all lines following the current line down to the line starting with the
189             delimiter (i.e., the delimiter must be at column 1). There must be no space between the sigil and
190             the identifier.
191            
192             B
193            
194             Quoted strings:
195            
196             "Hello, World!"
197            
198             Unquoted strings (symbols, identifiers):
199            
200             foobar cogito.ergo.sum Memento::mori
201            
202             Here-document strings:
203            
204             <
205             "This above all: to thine own self be true". - (Act I, Scene III).
206             hamlet
207            
208             Integegers and floats:
209            
210             38 10e-6 -.7 3.141592653589793
211            
212             For more information see F>, F> and F>.
213            
214             =head2 List Values
215            
216             We have two types of lists: sequential (aka array) and associative (aka map, hash, dictionary).
217            
218             B
219            
220             Arrays:
221            
222             ( 1, 2, ( 3, "Audiatur et altera pars!" ) )
223            
224             Maps:
225            
226             {
227             key = value;
228             standalone-key;
229             Pi = 3.14159;
230            
231             "meta-syntactic names" = (foo, bar, "lorem ipsum", Acme, ___);
232            
233             var = {
234             log = {
235             messages = <
236             Nov 27 21:55:04 localhost kernel: TSC appears to be running slowly. Marking it as unstable
237             Nov 27 22:34:27 localhost kernel: Uniform CD-ROM driver Revision: 3.20
238             Nov 27 22:34:27 localhost kernel: Loading iSCSI transport class v2.0-724.<6>PNP: No PS/2 controller found. Probing ports directly.
239             Nov 27 22:34:27 localhost kernel: wifi0: Atheros 5212: mem=0x26000000, irq=11
240             LOG
241             };
242             };
243             }
244            
245             =head2 Binary Data
246            
247             Binary data can be represented as base64-encoded string, or L
248             Here-Documents> string. For example,
249            
250             use MIME::Base64;
251            
252             $str = encode_base64($binary_buf);
253            
254             The result F<$str> will be a string broken into lines of no more than 76 characters each; the 76th
255             character will be a newline C<"\n">. Here is a complete Perl program that creates a file
256             F:
257            
258             use MIME::Base64;
259             use Data::Rlist;
260            
261             our $binary_data = join('', map { chr(int rand 256) } 1..300);
262             our $sample = { random_string => encode_base64($binary_data) };
263            
264             WriteData $sample, 'random.rls';
265            
266             These few lines create a file F containing text like the following:
267            
268             {
269             random_string = <<___
270             w5BFJIB3UxX/NVQkpKkCxEulDJ0ZR3ku1dBw9iPu2UVNIr71Y0qsL4WxvR/rN8VgswNDygI0xelb
271             aK3FytOrFg6c1EgaOtEudmUdCfGamjsRNHE2s5RiY0ZiaC5E5XCm9H087dAjUHPtOiZEpZVt3wAc
272             KfoV97kETH3BU8/bFGOqscCIVLUwD9NIIBWtAw6m4evm42kNhDdQKA3dNXvhbI260pUzwXiLYg8q
273             MDO8rSdcpL4Lm+tYikKrgCih9UxpWbfus+yHWIoKo/6tW4KFoufGFf3zcgnurYSSG2KRLKkmyEa+
274             s19vvUNmjOH0j1Ph0ZTi2pFucIhok4krJi0B5yNbQStQaq23v7sTqNom/xdRgAITROUIoel5sQIn
275             CqxenNM/M4uiUBV9OhyP
276             ___
277             ;
278             }
279            
280             Note that F> uses the predefined C<"default"> configuration, which enables here-doc
281             strings. See also L.
282            
283             =head2 Embedded Perl Code (Nanoscripts)
284            
285             Rlist text can define embedded Perl programs, called F. The embedded program text
286             has the form of a L with the special delimiter
287             C<"perl">. After the Rlist text has been parsed you call F> to F
288             all embedded Perl in the order of definiton. The function arranges it that within the F...
289            
290             =over
291            
292             =item *
293            
294             the F<$root> variable refers to the root of the input, as unblessed array- or hash-reference;
295            
296             =item *
297            
298             the F<$this> variable refers to the array or hash that stores the currently F'd nanoscript;
299            
300             =item *
301            
302             the F<$where> variable stores the name of the key, or the index, within F<$this>.
303            
304             =back
305            
306             The nanoscript can use this information to oriented itself within the parsed data, or even to
307             modify the data in-place. The result of F'ing will replace the nanoscript text. You can
308             also F the embedded Perl codes programmatically, using the F> and
309             F> functions.
310            
311             B
312            
313             Simple example of an Rlist text that hosts Perl code:
314            
315             (<
316             print "Hello, World!";
317             perl
318            
319             Here is a more complex example that defines a list of nanoscripts, and evaluates them:
320            
321             use Data::Rlist;
322            
323             $data = join('', );
324             $data = EvaluateData \$data;
325            
326             __END__
327             ( <
328             print "Hello World!\n" # english
329             perl
330             print "Hallo Welt!\n" # german
331             perl
332             print "Bonjour le monde!\n" # french
333             perl
334             print "Olá mundo!\n" # spanish
335             perl
336            
337             When we execute the above script the following output is printed before the script exits:
338            
339             Hello World!
340             Hallo Welt!
341             Bonjour le monde!
342             Olá mundo!
343            
344             Note that when the Rlist text after F<__END__> is placed in F, we can call
345             F)>> for the same effect. The next example modifies the parsed data
346             in place. Imagine a file F with the following content:
347            
348             ( <
349             ReadData(\\'{ foo = bar; }');
350             perl
351            
352             When we parse this file using
353            
354             $data = ReadData("this_file_modifies_itself");
355            
356             to F<$data> will be assigned the following Perl value
357            
358             [ "ReadData(\\'{ foo = bar; }');\n" ]
359            
360             Next we call F()> to "morph" this value into
361            
362             [ { 'foo' => 'bar' } ]
363            
364             The same effect can be achieved in just one call
365            
366             $data = EvaluateData("this_file_modifies_itself");
367            
368             =head2 Comments
369            
370             Rlist supports multiple forms of comments: F or F<#> single-line-comments, and F
371             multi-line-comments. You may use all three forms at will.
372            
373             =cut
374            
375             package Data::Rlist;
376            
377 10     10   183377 use strict;
  10         26  
  10         1027  
378 10     10   58 use warnings;
  10         20  
  10         389  
379 10     10   52 use Exporter;
  10         23  
  10         533  
380 10     10   53 use Carp;
  10         15  
  10         782  
381 10     10   63 use Scalar::Util qw/reftype/;
  10         19  
  10         1389  
382 10     10   10473 use integer;
  10         106  
  10         54  
383            
384 10         7399 use vars qw/$VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS
385            
386             $DEBUG
387             %PredefinedOptions
388             $RoundScientific $SafeCppMode $EchoStderr
389             $R $Fh $Locked $DefaultMaxDepth $MaxDepth $Depth
390             $Errors $Warnings $Broken $MissingInput @Messages
391             $DefaultCsvDelimiter $DefaultConfDelimiter $DefaultConfSeparator
392             $DefaultNanoscriptToken
393            
394             $REPunctuationCharacter $REIntegerHere $REFloatHere
395             $RESymbolCharacter $RESymbolHere $REStringHere
396             $REInteger $REFloat
397             $RESymbol $REString $REValue
398             @REIsPunct @REIsDigit
399 10     10   601 /;
  10         16  
400            
401             # Parser/lexer variables. Used by open_input, parse and lex. Declaring them as lexicals is
402             # slightly faster than to 'use vars'.
403            
404             my($Readstruct, $ReadFh, $Ln, $LnArray);
405             my(%Rules, @VStk, @NStk);
406            
407 10     10   60 use constant DEFAULT_VALUE => qq'""'; # default Rlist, the empty string
  10         27  
  10         32349  
408            
409             BEGIN {
410 10     10   30 $VERSION = '1.44';
411 10         21 $DEBUG = 0;
412 10         215 @ISA = qw/Exporter/;
413            
414             # Always exported (:DEFAULT) when the package is fetched with "use", not "required".
415            
416 10         176 @EXPORT = qw/ReadCSV WriteCSV
417             ReadConf WriteConf
418             ReadData EvaluateData WriteData
419             PrintData OutlineData StringizeData SqueezeData
420             KeelhaulData CompareData/;
421            
422             # Symbols exported on request.
423            
424 10         111 @EXPORT_OK = qw/:DEFAULT
425            
426             predefined_options complete_options
427            
428             maybe_quote7 quote7 escape7 unquote7 unescape7 unhere
429             is_value is_random_text is_symbol is_integer is_number
430             split_quoted parse_quoted
431            
432             equal round
433            
434             keelhaul deep_compare fork_and_wait synthesize_pathname
435            
436             $REInteger $REFloat $RESymbol/;
437            
438 10         318 %EXPORT_TAGS = (# Handle IEEE numbers
439             floats => [@EXPORT, qw/equal round is_number is_integer
440             /],
441             # Handle (quoted) strings
442             strings => [@EXPORT, qw/maybe_quote7 quote7 escape7
443             unquote7 unescape7
444             unhere split_quoted parse_quoted
445             is_value is_random_text is_number is_integer is_symbol
446             /],
447             # Compile options
448             options => [@EXPORT, qw/predefined_options complete_options
449             /],
450             # Auxiliary functions
451             aux => [@EXPORT, qw/keelhaul deep_compare fork_and_wait synthesize_pathname
452             /]);
453            
454 10         21 $MaxDepth = 0; $DefaultMaxDepth = 100; $Broken = 0;
  10         14  
  10         82  
455 10         18 $SafeCppMode = 0;
456 10         13 $EchoStderr = 0;
457 10         17 $RoundScientific = 0;
458 10         19 $DefaultConfSeparator = ' = ';
459 10         17 $DefaultConfDelimiter = '\s*=\s*';
460 10         17 $DefaultCsvDelimiter = '\s*,\s*';
461 10         30 $DefaultNanoscriptToken = 'perl';
462            
463 10         361 %PredefinedOptions =
464             (
465             default =>
466             {# Warning: "code_refs" are disabled by default because compile_fast() (the default compile
467             # function) never calls subs. Likewise the default "precision" must be undef!
468             eol_space => "\n",
469             bol_tabs => 1,
470             outline_hashes => 0,
471             outline_data => 6,
472             paren_space => '',
473             comma_punct => ', ',
474             semicolon_punct => ';',
475             assign_punct => ' = ',
476             here_docs => 1,
477             auto_quote => undef, # let write() and write_csv() choose their defaults
478             code_refs => 0,
479             scientific => 0,
480             separator => ',',
481             delimiter => undef,
482             precision => undef
483             },
484            
485             string =>
486             {
487             eol_space => '',
488             bol_tabs => 0,
489             outline_data => 0,
490             here_docs => 0
491             },
492            
493             outlined =>
494             {
495             eol_space => "\n",
496             bol_tabs => 1,
497             outline_hashes => 1,
498             outline_data => 1,
499             paren_space => ' ',
500             comma_punct => ', ',
501             },
502            
503             squeezed =>
504             {
505             bol_tabs => 0,
506             eol_space => '',
507             outline_hashes => 0,
508             outline_data => 0,
509             here_docs => 0,
510             code_refs => 0,
511             paren_space => '',
512             comma_punct => ',',
513             assign_punct => '=',
514             precision => 6,
515             }
516             );
517            
518             ########
519             # Regular expressions for scalars
520             #
521             # $RESymbolHere shall be defined equal to the 'identifier' regex in 'rlist.l', to keep the
522             # C/C++ and Perl implementations compatible. See also the C++ function quote() and the
523             # {identifier} rule in
524             #
525             # In Perl regexes, by default the "^" character matches only the beginning of the string, the
526             # "$" character only the end (or before the newline at the end). The "/s" modifier will force
527             # "^" to match only at the beginning of the string and "$" to match only at the end (or just
528             # before a newline at the end) of the string. "$" hence ignores an optional trailing newline.
529             #
530             # When "/m" is used this means for "foo\nbar" the "$" matches the end of the string (after "r")
531             # and also before every line break (between "o" and "\n"). Therefore we've to use "\z" which
532             # matches only at the end of the string.
533            
534 10         19 $REIntegerHere = '[+-]?\d+';
535 10         18 $REFloatHere = '(?:[+-]?)(?=\d|\.\d)\d*(?:\.\d*)?(?:[Ee](?:[+-]?\d+))?';
536 10         15 $REPunctuationCharacter = '\=\,;\{\}\(\)';
537 10         20 $RESymbolCharacter = 'a-zA-Z_0-9\-/\~:\.@';
538 10         37 $RESymbolHere = '[a-zA-Z_\-/\~:@]'.qq'[$RESymbolCharacter]*';
539 10         15 $REStringHere = '"[^"\\\r\n]*(?:\\.[^"\\\r\n]*)*"'; # " allowed inside the quotes, but only as \"
540            
541 10         464 $REInteger = qr/^$REIntegerHere\z/;
542 10         664 $REFloat = qr/^$REFloatHere\z/;
543 10         290 $RESymbol = qr/^$RESymbolHere\z/;
544 10         296 $REString = qr/^$REStringHere\z/;
545            
546 10         946 $REValue = qr/$REString|
547             $REInteger|
548             $REFloat|
549             $RESymbol/x;
550            
551 10         54 $REValue = qr/^$REStringHere\z|
552             ^$REIntegerHere\z|
553             ^$REFloatHere\z|
554             ^$RESymbolHere\z/x if 0; # disabled because it is slightly slower
555            
556             ########
557             # Rlist parser map:
558             #
559             # token => [ rule, deduce-function ]
560             # rule => [ rule, deduce-function ]
561             #
562             # See `lex()' for token meanings.
563            
564             sub syntax_error($;$) {
565 0   0 0 0 0 my($msg, $tr) = (shift, shift||'??');
566 0         0 $msg =~ s/\s/ /go; pr1nt('ERROR', $msg);
  0         0  
567 0         0 $Errors++; $tr
  0         0  
568             }
569             sub warning($;$) {
570 0   0 0 0 0 my($msg, $tr) = (shift, shift||'');
571 0         0 $msg =~ s/\s/ /go; pr1nt('WARNING', $msg);
  0         0  
572 0         0 $Warnings++; $tr
  0         0  
573             }
574            
575             %Rules =
576             (#
577             # Key/value pairs.
578             #
579             # For nanoscripts (n) push hash-ref, key and the script to @NStk.
580             #
581            
582 1         2 '{}' => sub { push @VStk, { }; 'v' },
  1         5  
583 1103         3825 '{h}' => sub { 'v' },
584             # first pairs (open the hash)
585 22         86 'v;' => sub { push @VStk, { pop(@VStk) => '' }; 'h' },
  22         87  
586 1081         3952 'v=v;' => sub { push @VStk, { splice @VStk, -2 }; 'h' },
  1081         3936  
587 1         5 'v=n;' => sub { my($k, $v) = splice @VStk, -2;
588 1         19 my $h = { $k => $v };
589 1         7 push @VStk, $h; push @NStk, [ $h, $k ]; 'h' },
  1         3  
  1         18  
590             # subsequent pairs (complete the hash)
591 456         917 'hv;' => sub { my $k = pop @VStk; $VStk[$#VStk]->{$k} = ''; 'h' },
  456         1453  
  456         1495  
592 4198         9407 'hv=v' => sub { my($k, $v) = splice @VStk, -2; $VStk[$#VStk]->{$k} = $v; 'h' },
  4198         13113  
  4198         14097  
593 0         0 'hv=n' => sub { my($k, $v) = splice @VStk, -2; $VStk[$#VStk]->{$k} = $v; push @NStk, [ $VStk[$#VStk], $k ]; 'h' },
  0         0  
  0         0  
  0         0  
594 4198         12418 'h;' => sub { 'h' },
595            
596             #
597             # Single values/scripts.
598             #
599            
600 1         2 '()' => sub { push @VStk, [ ]; 'v' },
  1         6  
601 1830         6063 '(l)' => sub { 'v' },
602 434         1211 '(v)' => sub { push @VStk, [pop(@VStk)]; 'v' },
  434         1298  
603 8         18 '(n)' => sub { my $v = pop @VStk; push @VStk, [ $v ]; push @NStk, [ $VStk[$#VStk], 0 ]; 'v' },
  8         21  
  8         20  
  8         33  
604 1828         5277 'v,' => sub { push @VStk, [pop(@VStk)]; 'l,' },
  1828         7342  
605 2         6 'n,' => sub { my $v = pop @VStk; push @VStk, [ $v ]; push @NStk, [ $VStk[$#VStk], 0 ]; 'l,' },
  2         6  
  2         9  
  2         11  
606 12676         25733 'l,v' => sub { my $v = pop @VStk; push @{$VStk[$#VStk]}, $v; 'l' }, # push to existing list
  12676         15355  
  12676         27960  
  12676         55316  
607 31         57 'l,n' => sub { my $v = pop @VStk; push @{$VStk[$#VStk]}, $v; push @NStk, [ $VStk[$#VStk], $#{$VStk[$#VStk]} ]; 'l' },
  31         42  
  31         91  
  31         60  
  31         156  
  31         137  
608            
609             #
610             # Rules for syntax errors. All rules containing '??' are error-recovery-rules.
611             #
612            
613 0         0 '=??' => sub { syntax_error("invalid value after '='", ';') },
614 0         0 '??;' => sub { syntax_error("invalid key/value before ';'", ';') },
615 0         0 ',??' => sub { push @VStk, ''; syntax_error("invalid value after ','", ',v') },
  0         0  
616 0         0 '??' => sub { '' },
617            
618 0         0 'vv' => sub { my($k, $v) = splice @VStk, -2; syntax_error("missing ',' or ';'") },
  0         0  
619 0         0 'v=v}' => sub { my($k, $v) = splice @VStk, -2; push @VStk, { $k => $v }; warning("unterminated pair: expected ';'", 'h}') },
  0         0  
  0         0  
620 0         0 'v=v,' => sub { my($k, $v) = splice @VStk, -2; warning("pair terminated with ',': expected ';'", '??') },
  0         0  
621 0         0 'v=;' => sub { warning("missing value, or superfluous '='", 'v;') },
622 0         0 'v=}' => sub { warning("missing value: expected ';', not '}'", 'v;') },
623 0         0 '(v}' => sub { my $v = pop @VStk; syntax_error("expected ')' after value, not '}'") },
  0         0  
624 0         0 '{v)' => sub { my $v = pop @VStk; syntax_error("expected '(' before value, not '{'") },
  0         0  
625 0         0 '{v}' => sub { my $k = pop @VStk; push @VStk, { $k => '' }; warning("unterminated pair: expected ';'", 'h') },
  0         0  
  0         0  
626            
627 0         0 '(v,)' => sub { warning("superfluous ',' at end of list", '(v)') },
628 0         0 '(l,)' => sub { warning("superfluous ',' at end of list", 'v') },
629            
630 0         0 '{{' => sub { warning("non-scalar hash-key", '??') },
631 0         0 '{(' => sub { warning("non-scalar hash-key", '??') },
632            
633 0         0 'n;' => sub { warning("nanoscript ignored: shall be def'd as value, not key", 'v;') },
634 0         0 'n=v;' => sub { warning("nanoscript ignored: shall be def'd as value, not key", 'v=v;') },
635 10         1040 );
636            
637             # True syntax errors, which cannot be converted into valid rules. The error will be printed
638             # and recorded in @Messages when '??' is actually reduced.
639            
640 10         62 foreach my $errrule ((',,', ',;', ';,', ';;',
641             '{=', '{,', '{;',
642             '(=', '(,', '(;',
643             '==',
644             '(v;', '(n;',
645             'v=,', 'v=)')) {
646 150 50       413 die if exists $Rules{$errrule};
647 150         39871 $Rules{$errrule} = eval(<<___);
648             sub { my \@r = map { s/\\s+/ /g; \$_ } map { if (/[vnhl]/) { pop(\@VStk) }; s/v/value/; s/n/nanoscript/; s/h/hash/; s/l/list/; \$_ }
649             split / */, '$errrule';
650             return syntax_error("'".join(' ', \@r)."'"); }
651             ___
652             }
653            
654 10         29 my($rule_max, $rule_min) = (0, 9);
655 10         108 foreach (keys %Rules) {
656 500 100       839 $rule_min = length($_) if length($_) < $rule_min;
657 500 100       1171 $rule_max = length($_) if length($_) > $rule_max;
658             }
659 10 50       74 die $rule_min if $rule_min != 2;
660 10 50       72644 die $rule_max if $rule_max != 4;
661             }
662            
663             sub pr1nt(@)
664             {
665             # This function is used to write a new comment line (usually some sort of error message) into
666             # the currently compiled file, and to STDERR (if $Data::Rlist::DEBUG).
667            
668 1     1 0 2 my $label = shift;
669 4         15 my $msg = join(': ', grep { length }
  2         25  
670             ($label,
671             ((defined($Readstruct) &&
672             exists $Readstruct->{filename}) ? $Readstruct->{filename}."($.)" : ""),
673 1 50 33     7 grep { defined } @_))."\n";
674 1 50       5 foreach my $fh (grep { defined } ($Fh, $EchoStderr ? *STDERR{IO} : undef)) {
  2         6  
675 0 0       0 next unless defined $fh;
676 0 0       0 print $fh map { $fh == defined($Fh) ? "# $_" : $_ } $msg;
  0         0  
677             }
678 1         3 push @Messages, $msg;
679             }
680            
681             =head1 PACKAGE INTERFACE
682            
683             The core functions to cultivate package objects are F>, F>, F> and
684             F>. When a regular package function is called in object context some omitted arguments are
685             read from object attributes. This is true for the following functions: F>, F>,
686             F>, F>, F>, F>, F>,
687             F> and F>.
688            
689             Unless called in object context the first argument has an indifferent meaning (i.e., it is no
690             F reference). Then F> expects an input file or string, F> the data
691             to compile etc.
692            
693             =head2 Construction
694            
695             =over
696            
697             =item F
698            
699             Create a F object from the hash ATTRIBUTES. For example,
700            
701             $self = Data::Rlist->new(-input => 'this.dat',
702             -data => $thing,
703             -output => 'that.dat');
704            
705             For this object the call Fread()|/read>> reads from F, and
706             Fwrite()|/write>> writes any Perl data F<$thing> to F.
707            
708             B
709            
710             =over 8
711            
712             =item C<-input =E INPUT>
713            
714             =item C<-filter =E FILTER>
715            
716             =item C<-filter_args =E FILTER-ARGS>
717            
718             Defines what Rlist text to parse and how to preprocess an input file. INPUT is a filename or
719             string reference. FILTER can be 1 to select the standard C preprocessor F. These attributes
720             are applied by F>, F>, F> and F>.
721            
722             =item C<-data =E DATA>
723            
724             =item C<-options =E OPTIONS>
725            
726             =item C<-output =E OUTPUT>
727            
728             Defines the Perl data to be L into text (DATA), how it shall be compiled
729             (OPTIONS) and where to store the compiled text (OUTPUT). When OUTPUT is string reference the
730             compiled text will be stored in that string. When OUTPUT is F a new string is created.
731             When OUTPUT is a string value it is a filename. These attributes are applied by F>,
732             F>, F>, F> and F>.
733            
734             =item C<-header =E HEADER>
735            
736             Defines an array of text lines, each of which will by prefixed by a F<#> and then written at the
737             top of the output file.
738            
739             =item C<-delimiter =E DELIMITER>
740            
741             Defines the field delimiter for F<.csv>-files. Applied by F> and F>.
742            
743             =item C<-columns =E STRINGS>
744            
745             Defines the column names for F<.csv>-files to be written into the first line.
746            
747             =back
748            
749             B
750            
751             The attributes listed below raise new values for package globals for the time an object method
752             runs.
753            
754             =over
755            
756             =item C<-InputRecordSeparator =E FLAG>
757            
758             Masquerades F<$/>, which affects how lines are read and written to and from Rlist- and CSV-files.
759             You may also set F<$/> by yourself. See L and L.
760            
761             =item C<-MaxDepth =E INTEGER>
762            
763             =item C<-SafeCppMode =E FLAG>
764            
765             =item C<-RoundScientific =E FLAG>
766            
767             Masquerade F>, F>
768             and F>.
769            
770             =item C<-EchoStderr =E FLAG>
771            
772             Print read errors and warnings message on STDERR (default: off).
773            
774             =item C<-DefaultCsvDelimiter =E REGEX>
775            
776             =item C<-DefaultConfDelimiter =E REGEX>
777            
778             Masquerades F<$Data::Rlist::DefaultCsvDelimiter> and F<$Data::Rlist::DefaultConfDelimiter>. These
779             globals define the default regexes to use when the F<-options> attribute does not specifiy the
780             L|/Compile Options> regex. Applied by F> and F>.
781            
782             =item C<-DefaultConfSeparator =E STRING>
783            
784             Masquerades F<$Data::Rlist::DefaultConfSeparator>, the default string to use when the F<-options>
785             attribute does not specifiy the L|/Compile Options> string. Applied by
786             F>.
787            
788             =back
789            
790             =item F
791            
792             Localize object SELF within the package and run SUB. This means that some of SELF's attribute
793             masqquerade few package globals for the time SUB runs. SELF then locks the package, and
794             F<$Data::Rlist::Locked> is greater than 0.
795            
796             =back
797            
798             =head2 Attribute Access
799            
800             =over
801            
802             =item F
803            
804             Reset or initialize object attributes, then return SELF. Each ATTRIBUTE is a name/value-pair. See
805             F> for a list of valid names. For example,
806            
807             $obj->set(-input => \$str, -output => 'temp.rls', -options => 'squeezed');
808            
809             =item F
810            
811             =item F
812            
813             =item F
814            
815             Get some attribute NAME from object SELF. Unless NAME exists returns DEFAULT. The F
816             method has no default value, hence it dies unless NAME exists. F returns true when NAME
817             exists, false otherwise. For NAME the leading hyphen is optional. For example,
818            
819             $self->get('foo'); # returns $self->{-foo} or undef
820             $self->get(-foo=>); # dto.
821             $self->get('foo', 42); # returns $self->{-foo} or 42
822            
823             =back
824            
825             =cut
826            
827             sub new {
828 139     139 1 8359 my($prototype, $k) = shift;
829 139 50       533 carp <<___ if @_ & 1;
830 0         0 $prototype->Data::Rlist::new(${\(join(', ', @_))})
831             odd number of arguments supplied, expecting key/value pairs
832             ___
833 139         556 my %args = @_;
834 139   33     392 bless { map { $k = $_;
  293         730  
835 293         547 s/^_+//o; # remove leading underscores
836 293         630 s/^([^\-])/-$1/o; # prepend missing '-'
837 293         1914 $_ => $args{$k}
838             } keys %args }, ref($prototype) || $prototype;
839             }
840            
841             sub set {
842 758     758 1 8104 my($self) = shift;
843 758         1996 my %attr = @_;
844 758         2618 while(my($k, $v) = each %attr) {
845 858         3418 $self->{$k} = $v
846             } $self
847 758         1474 }
848            
849             sub require($$) { # get attribute or confess
850 134     134 1 239 my($self, $attr) = @_;
851 134         285 my $v = $self->get($attr);
852 134 50       349 confess "$self->require(): missing '$attr' attribute:\n\t\t".join("\n\t\t", map { "$_ = $self->{$_}" } keys %$self) unless defined $v;
  0         0  
853 134         417 return $v;
854             }
855            
856             sub get($$;$) { # get attribute or return default value/undef
857 1537     1537 1 3927 my($self, $attr, $default) = @_;
858 1537 100       4762 $attr = '-'.$attr unless $attr =~ /^-/;
859 1537 100       6269 return $self->{$attr} if exists $self->{$attr};
860 552         1341 return $default;
861             }
862            
863             sub has($$) {
864 3788     3788 1 6405 my($self, $attr) = @_;
865 3788 50       11377 $attr = '-'.$attr unless $attr =~ /^-/;
866 3788         14853 exists $self->{$attr};
867             }
868            
869             sub dock($\&) {
870 377 50   377 1 1369 carp "package Data::Rlist locked" if $Locked++; # TODO: use critical sections and atomic increment
871 377         633 my ($self, $block) = @_;
872 377 50       1453 local $MaxDepth = $self->get(-MaxDepth=>) if $self->has(-MaxDepth=>);
873 377 50       2090 local $SafeCppMode = $self->get(-SafeCppMode=>) if $self->has(-SafeCppMode=>);
874 377 50       1230 local $EchoStderr = $self->get(-EchoStderr=>) if $self->has(-EchoStderr=>);
875 377 50       1202 local $RoundScientific = $self->get(-RoundScientific=>) if $self->has(-RoundScientific=>);
876 377 50       1418 local $DefaultCsvDelimiter = $self->get(-DefaultCsvDelimiter=>) if $self->has(-DefaultCsvDelimiter=>);
877 377 50       1154 local $DefaultConfDelimiter = $self->get(-DefaultConfDelimiter=>) if $self->has(-DefaultConfDelimiter=>);
878 377 50       1322 local $DefaultConfSeparator = $self->get(-DefaultConfSeparator=>) if $self->has(-DefaultConfSeparator=>);
879 377 50       1556 local $DefaultNanoscriptToken = $self->get(-DefaultNanoscriptToken=>) if $self->has(-DefaultNanoscriptToken=>);
880 377 100       1182 local $DEBUG = $self->get(-DEBUG=>) if $self->has(-DEBUG=>);
881 377 50       1139 local $/ = $self->get(-InputRecordSeparator=>) if $self->has(-InputRecordSeparator=>);
882 377         632 local $R;
883 377 100       1277 unless (defined wantarray) { # void context
    50          
884 115         272 $block->(); --$Locked;
  115         745  
885             } elsif (wantarray) {
886 0         0 my @r = $block->(); --$Locked; return @r;
  0         0  
  0         0  
887             } else {
888 262         603 my $r = $block->(); --$Locked; return $r;
  262         1557  
  262         3283  
889             }
890             }
891            
892             =head2 Public Functions
893            
894             =over
895            
896             =item F
897            
898             Parse data from INPUT, which specifies some Rlist-text. See also F>, F>.
899            
900             B
901            
902             INPUT shall be either
903            
904             - some Rlist object created by F>,
905            
906             - a string reference, in which case F and F> parse Rlist text from it,
907            
908             - a string scalar, in which case F assumes a file to parse.
909            
910             See F> for the FILTER and FILTER-ARGS parameters, which are used to preprocess an
911             input file. When an input file cannot be F'd and F'd this function dies. When INPUT
912             is an object, arguments for FILTER and FILTER-ARGS eventually override the F<-filter> and
913             F<-filter_args> attributes.
914            
915             B
916            
917             The parsed data as array- or hash-reference, or F if there was no data. The latter may also
918             be the case when file consist only of comments/whitespace.
919            
920             B
921            
922             This function may die. Dying is Perl's mechanism to raise exceptions, which eventually can be
923             catched with F. For example,
924            
925             my $host = eval { use Sys::Hostname; hostname; } || 'some unknown machine';
926            
927             This code fragment traps the F exception, so that F returns F or the result of
928             calling F. The following example uses F to trap exceptions thrown by F:
929            
930             $object = new Data::Rlist(-input => $thingfile);
931             $thing = eval { $object->read };
932            
933             unless (defined $thing) {
934             if ($object->errors) {
935             print STDERR "$thingfile has syntax errors"
936             } else {
937             print STDERR "$thingfile not found, is locked or empty"
938             }
939             } else {
940             # Can use $thing
941             .
942             .
943             }
944            
945             =item F
946            
947             =item F
948            
949             Parse data from INPUT, which specifies some comma-separated-values (CSV) text. Both functions
950            
951             - read data from strings or files,
952            
953             - use an optional delimiter,
954            
955             - ignore delimiters in quoted strings,
956            
957             - ignore empty lines,
958            
959             - ignore lines begun with F<#>.
960            
961             F is a variant of F dedicated to configuration files. Such files consist
962             of lines of the form
963            
964             key = value
965            
966             B
967            
968             For INPUT see F>. For FILTER, FILTER-ARGS see F>.
969            
970             OPTIONS can be used to override the L|/Compile Options> regex. For example, a
971             delimiter of C<'\s+'> splits the line at horizontal whitespace into multiple values (with respect
972             of quoted strings). For F the delimiter defaults to C<'\s*,\s*'>, and for F
973             to C<'\s*=\s*'>. See also F> and F>.
974            
975             B
976            
977             Both functions return a list of lists. Each embedded array defines the fields in a line.
978            
979             B
980            
981             Un/quoting of values happens implicitly. Given a file F
982            
983             # Comment
984             SERVER = hostname
985             DATABASE = database_name
986             LOGIN = "user,password"
987            
988             the call F<$opts=ReadConf(C<"db.conf">)> assigns
989            
990             [ [ 'SERVER', 'hostname' ],
991             [ 'DATABASE', 'database_name' ],
992             [ 'LOGIN', 'user,password' ]
993             ]
994            
995             The F> function can be used to create or update the configuration:
996            
997             push @$opts, [ 'MAGIC VALUE' => 3.14_15 ];
998            
999             WriteConf('db.conf', { precision => 2 });
1000            
1001             This writes to F:
1002            
1003             SERVER = hostname
1004             DATABASE = database_name
1005             LOGIN = "user,password"
1006             "MAGIC VALUE" = 3.14
1007            
1008             =item F
1009            
1010             Calls F> to parse Rlist language productions from the string or string-reference INPUT.
1011             When INPUT is an object do this for its F<-input> attribute.
1012            
1013             =item F
1014            
1015             Return the last result of calling F>, which is either F or some array- or
1016             hash-reference. When SELF is passed as object reference, returns the result that occured the last
1017             time SELF had called F>.
1018            
1019             =item F
1020            
1021             In list context return an array of nanoscripts defined by the last call to F>. When SELF
1022             is passed return this information for the last time SELF had called F>. The result has the
1023             form:
1024            
1025             ( [ $hash_or_array_ref, $key_or_index ], # 1st nanoscript
1026             [ $hash_or_array_ref, $key_or_index ], # 2nd nanoscript
1027             .
1028             .
1029             .
1030             )
1031            
1032             In scalar context return a reference to the above. This information defines the location of all
1033             embedded Perl scripts within the result, and can be used to F them programmatically. See
1034             also F>, F>.
1035            
1036             =item F
1037            
1038             Evaluates all nanoscripts defined by the last call to F>. When called as method evaluates
1039             the nanoscripts defined by the last time SELF had called F>. Returns the number of
1040             scripts or 0 if none were available. Each script is replaced by the result of F'ing it.
1041             (For details and examples see L.)
1042            
1043             =item F
1044            
1045             In list context returns a list of compile-time messages that occurred in the last call to
1046             F>. In scalar context returns an array reference. When an package object SELF is passed
1047             returns the information for the last time SELF had called F>.
1048            
1049             =item F
1050            
1051             =item F
1052            
1053             Returns the number of syntax errors and warnings that occurred in the last call to F>.
1054             When called as method returns the number that occured the last time SELF had called F>.
1055            
1056             Example:
1057            
1058             use Data::Rlist;
1059            
1060             our $data = ReadData 'things.rls';
1061            
1062             if (Data::Rlist::errors() || Data::Rlist::warnings()) {
1063             print join("\n", Data::Rlist::messages())
1064             } else {
1065             # Ok, $data is an array- or hash-reference.
1066             die unless $data;
1067            
1068             }
1069            
1070             =item F
1071            
1072             Returns the number of times the last F> violated F
1073             Data>>. When called as method returns the information for the last time SELF had called
1074             F>.
1075            
1076             =item F
1077            
1078             Returns true when the last call to F> yielded F, because there was nothing to
1079             parse. When called as method returns the information for the last time SELF had called
1080             F>.
1081            
1082             =item F
1083            
1084             Transliterates Perl data into Rlist text and write the text to a file or string buffer. F
1085             is auto-exported as F>.
1086            
1087             B
1088            
1089             DATA is either an object generated by F>, or any Perl data including F. In case of
1090             an object the actual DATA value is defined by its F<-data> attribute. (When F<-data> refers to
1091             another Rlist object, this other object is invoked.)
1092            
1093             OUTPUT defines the output location, as filename, string-reference or F. When F the
1094             function allocates a string and returns a reference to it. OUTPUT defaults to the F<-output>
1095             attribute when DATA defines an object.
1096            
1097             OPTIONS define how to compile DATA: when F or C<"fast"> uses F>, when
1098             C<"perl"> uses F>, otherwise F>. Defaults to the F<-options>
1099             attribute when DATA is an object.
1100            
1101             HEADER is a reference to an array of strings that shall be printed literally at the top of an
1102             output file. Defaults to the F<-header> attribute when DATA is an object.
1103            
1104             B
1105            
1106             When F creates a file it returns 0 for failure or 1 for success. Otherwise it returns a
1107             string reference.
1108            
1109             B
1110            
1111             $self = new Data::Rlist(-data => $thing, -output => $output);
1112            
1113             $self->write; # Compile $thing into a file ($output is a filename)
1114             # or string ($output is a string reference).
1115            
1116             Data::Rlist::write($thing, $output); # dto., but using the functional interface.
1117            
1118             =item F
1119            
1120             =item F
1121            
1122             Write DATA as comma-separated-values (CSV) to file or string OUTPUT. F writes
1123             configuration files where each line contains a tagname, a separator and a value.
1124            
1125             B
1126            
1127             DATA is either an object, or defines the data to be compiled as reference to an array of arrays.
1128             F uses only the first and second fields. For example,
1129            
1130             [ [ a, b, c ], # fields of line 1
1131             [ d, e, f, g ], # fields line 2
1132             .
1133             .
1134             ]
1135            
1136            
1137            
1138             OPTIONS specifies the comma-separator (C<"separator">), how to quote (C<"auto_quote">), the
1139             linefeed (C<"eol_space">) and the numeric precision (C<"precision">). COLUMNS specifies the column
1140             names to be written to the first line. Likewise the text from the HEADER array is written in form
1141             of F<#>-comments at the top of an output file.
1142            
1143             B
1144            
1145             When a file was created both function return 0 for failure, or 1 for success. Otherwise they
1146             return a reference to the compiled text.
1147            
1148             B
1149            
1150             Functional interface:
1151            
1152             use Data::Rlist; # imports WriteCSV
1153            
1154             WriteCSV($thing, "foo.dat");
1155            
1156             WriteCSV($thing, "foo.dat", { separator => '; ' }, [qw/GBKNR VBKNR EL LaD/]);
1157            
1158             WriteCSV($thing, \$target_string);
1159            
1160             $string_ref = WriteCSV($thing);
1161            
1162             Object-oriented interface:
1163            
1164             $object = new Data::Rlist(-data => $thing, -output => "foo.dat",
1165             -options => { separator => '; ' },
1166             -columns => [qw/GBKNR VBKNR EL LaD LaD_V/]);
1167            
1168             $object->write_csv; # write $thing as CSV to foo.dat
1169             $object->write; # write $thing as Rlist to foo.dat
1170            
1171             $object->set(-output => \$target_string);
1172            
1173             $object->write_csv; # write $thing as CSV to $target_string
1174            
1175             See also F> and F>.
1176            
1177             =item F
1178            
1179             Stringify any Perl data and return a reference to the string. Works like F> but always
1180             compiles to a new string to which it returns a reference. The default for OPTIONS will be
1181             L|/Predefined Options>.
1182            
1183             =item F
1184            
1185             Stringify any Perl dats and return the compiled text string value. OPTIONS default to
1186             L|/Predefined Options>. For example,
1187            
1188             print "\n\$thing dumped: ", Data::Rlist::write_string_value($thing);
1189            
1190             $self = new Data::Rlist(-data => $thing);
1191            
1192             print "\nsame \$thing dumped: ", $self->write_string_value;
1193            
1194             =item F
1195            
1196             Do a deep copy of DATA according to L. First the function compiles DATA
1197             to Rlist text, then restores the data from exactly this text. This process is called "keelhauling
1198             data", and allows us to
1199            
1200             - adjust the accuracy of numbers,
1201            
1202             - break circular-references,
1203            
1204             - drop F<\*foo{THING}>s,
1205            
1206             - bring multiple data sets to the same, common basis.
1207            
1208             It is useful (e.g.) when DATA had been hatched by some other code, and you don't know whether it
1209             is hierachical, or if typeglob-refs nist inside. Then keelhaul it to clean it from its past. For
1210             example, to bring all numbers in
1211            
1212             $thing = { foo => [ [ .00057260 ], -1.6804e-4 ] };
1213            
1214             to a certain accuracy, use
1215            
1216             $deep_copy_of_thing = Data::Rlist::keelhaul($thing, { precision => 4 });
1217            
1218             All number scalars in F<$thing> are rounded to 4 decimal places, so they're finally comparable as
1219             floating-point numbers. To F<$deep_copy_of_thing> is assigned the hash-reference
1220            
1221             { foo => [ [ 0.0006 ], -0.0002 ] }
1222            
1223             Likewise one can convert all floats to integers:
1224            
1225             $make_integers = new Data::Rlist(-data => $thing, -options => { precision => 0 });
1226            
1227             $thing_without_floats = $make_integers->keelhaul;
1228            
1229             When F> is called in an array context it also returns the text from which the copy had
1230             been built. For example,
1231            
1232             $deep_copy = Data::Rlist::keelhaul($thing);
1233            
1234             ($deep_copy, $rlist_text) = Data::Rlist::keelhaul($thing);
1235            
1236             $deep_copy = new Data::Rlist(-data => $thing)->keelhaul;
1237            
1238             B
1239            
1240             F> won't throw F nor return an error, but be prepared for the following effects:
1241            
1242             =over
1243            
1244             =item *
1245            
1246             F, F, F and F references were compiled, whether blessed or not. (Since
1247             compiling does not store type information, F will turn blessed references into barbars
1248             again.)
1249            
1250             =item *
1251            
1252             F, F and F references have been converted into strings.
1253            
1254             =item *
1255            
1256             Depending on the compile options, F references are invoked, deparsed back into their function
1257             bodies, or dropped.
1258            
1259             =item *
1260            
1261             Depending on the compile options floats are rounded, or are converted to integers.
1262            
1263             =item *
1264            
1265             F'd array elements are converted into the default scalar value C<"">.
1266            
1267             =item *
1268            
1269             Unless F<$Data::Rlist::MaxDepth> is 0, anything deeper than F<$Data::Rlist::MaxDepth> will be
1270             thrown away.
1271            
1272             =item *
1273            
1274             When the data contains objects, no special methods are triggered to "freeze" and "thaw" the
1275             objects.
1276            
1277             =back
1278            
1279             See also F> and F>
1280            
1281             =back
1282            
1283             =head2 Static Functions
1284            
1285             =over
1286            
1287             =item F
1288            
1289             Return are predefined hash-reference of compile otppns. PREDEF-NAME defaults to
1290             L|/Predefined Options>.
1291            
1292             =item F
1293            
1294             Completes OPTIONS with BASICS, so that all pairs not already in OPTIONS are copied from BASICS.
1295             Always returns a new hash-reference, i.e., neither OPTIONS nor BASICS are modified. Both arguments
1296             define hashes or some L. BASICS defaults to
1297             L|/Predefined Options>. For example,
1298            
1299             $options = complete_options({ precision => 0 }, 'squeezed')
1300            
1301             merges the predefined options for L text|/Predefined Options> with a numeric
1302             precision of 0 (converts all floats to integers).
1303            
1304             =back
1305            
1306             =cut
1307            
1308             sub is_integer(\$);
1309             sub is_number(\$);
1310             sub is_symbol(\$);
1311             sub is_random_text(\$);
1312            
1313             sub read($;$$);
1314             sub read($;$$) {
1315 627     627 1 8369 my($input, $fcmd, $fcmdargs) = @_;
1316            
1317 627 100       1892 if (ref($input) eq __PACKAGE__) {
1318             $input->dock(sub {
1319 134 50   134   381 unless ($fcmd) {
1320 134         681 $fcmd = $input->get('-filter');
1321 134         353 $fcmdargs = $input->get('-filter_args');
1322             }
1323 134         550 $R = Data::Rlist::read($input->require(-input=>), $fcmd, $fcmdargs); # returns a reference
1324 134         1215 $input->set(-read_result => [$Warnings, $Errors, $Broken, $MissingInput, \@Messages]);
1325 134 100       729 $input->set(-nanoscripts => (@NStk ? [@NStk] : undef));
1326 134         450 $input->set(-result => $R);
1327 134         304 $R
1328             }
1329             )
1330 134         1281 } else {
1331             # $input is either a string (filename) or reference.
1332 493 50       1307 local $| = 1 if $DEBUG;
1333 493 50       4295 if ($DEBUG) {
1334 0 0 0     0 print STDERR "Data::Rlist::open_input($input, $fcmd, $fcmdargs)\n" if $fcmd && $fcmdargs;
1335 0 0 0     0 print STDERR "Data::Rlist::open_input($input, $fcmd)\n" if $fcmd && !$fcmdargs;
1336 0 0       0 print STDERR "Data::Rlist::open_input($input)\n" unless $fcmd;
1337             }
1338 493 100       3305 return undef unless open_input($input, $fcmd, $fcmdargs);
1339 492 50       1181 confess unless defined $Readstruct;
1340 492         1289 my $data = parse();
1341 492 0       1187 print STDERR "Data::Rlist::close_input() parser result = ", (defined $data) ? $data : 'undef', "\n" if $DEBUG;
    50          
1342 492         1446 close_input();
1343 492         5056 return $data;
1344             }
1345             }
1346            
1347             sub read_csv($;$$$);
1348             sub read_csv($;$$$) {
1349 48     48 1 123 my($input, $options, $fcmd, $fcmdargs) = @_;
1350            
1351 48 100       321 if (ref($input) eq __PACKAGE__) {
1352             $input->dock
1353             (sub {
1354 24   66 24   168 $options ||= $input->get('options');
1355 24   33     97 $fcmd ||= $input->get('filter');
1356 24   33     87 $fcmdargs ||= $input->get('filter_args');
1357 24         58 $input = $input->get('input');
1358 24         75 Data::Rlist::read_csv($input, $options, $fcmd, $fcmdargs);
1359 24         222 });
1360             } else {
1361             # Call open_input, let lexln read all lines, call close_input. $input names a file or a
1362             # string-ref (buffer); from both we're reading linewise. For strings open_input does not
1363             # call read_csv, but splits at LF or CR+LF. Since lexln only chomps $/ we explicitly check
1364             # for a trailing \r here.
1365            
1366 24 50       101 return undef unless open_input($input, $fcmd, $fcmdargs);
1367 24 50       72 confess unless defined $Readstruct;
1368 24   66     73 my $delim = complete_options($options)->{delimiter} || $DefaultCsvDelimiter;
1369 24         87 my @L; push @L, $Ln while lexln();
  24         107  
1370 24         47 my @R; push @R, map { [ map { maybe_unquote7($_) } split_quoted($_, $delim) ] }
  48         206  
  7752         18011  
  48         18245  
1371 24         55 grep { not /^\s*#|^\s*$/o } # throw away comment lines and blank lines
1372             #map { s/\r+$//o; $_ } # strip trailing \r
1373             @L;
1374 24         112 close_input();
1375 24         434 return \@R;
1376             }
1377             }
1378            
1379             sub read_conf(@) {
1380 4     4 1 13 my($input, $options, $fcmd, $fcmdargs) = @_;
1381 4 50 33     43 $options ||= $input->get('options') if ref($input) eq __PACKAGE__;
1382 4 50       14 $options = complete_options($options) unless ref $options; # expand using predef'd set "default"
1383 4   66     22 $options->{delimiter} ||= $DefaultConfDelimiter; # ...where "delimiter" is undef
1384 4         16 return read_csv($input, $options, $fcmd, $fcmdargs);
1385             }
1386            
1387             sub read_string($);
1388             sub read_string($) {
1389 337     337 1 517 my $r = shift;
1390 337 100 66     3390 if (defined($r) and not defined reftype($r)) {
    50          
1391 2         9 return read_string(\$r);
1392             } elsif (reftype($r) ne 'SCALAR') {
1393 0         0 carp 'string or string-reference required';
1394 335         837 } Data::Rlist::read($r);
1395             }
1396            
1397             sub result(;$) {
1398 68     68 1 1840 my $self = shift;
1399 68 100       339 return $self->get(-result=>) if $self;
1400 1         4 return $R;
1401             }
1402            
1403             sub nanoscripts(;$) {
1404 53 50   53 1 1477 return unless defined wantarray;
1405 53         112 my $self = shift;
1406 53 100       213 my $ls = $self ? $self->get(-nanoscripts=>) : \@NStk;
1407 53 100       208 return wantarray ? @$ls : $ls;
1408             }
1409            
1410             sub evaluate_nanoscripts(;$)
1411             {
1412 19     19 1 1315 my($self) = @_;
1413 19         45 my @ns = nanoscripts($self);
1414 19         44 my $root = result($self); # this is $Data::Rlist::R or $self->{'-result'}
1415 19         35 my($this, $where);
1416            
1417 19         38 foreach my $ns (@ns) {
1418 37         76 $this = $ns->[0]; # list in which the nanoscript occurs
1419 37         56 $where = $ns->[1]; # key or index into the list
1420 37 100       160 if (ref($this) =~ 'ARRAY') {
1421 36         59 my $i = int($where);
1422 36         62 my $code = $this->[$i];
1423 36 50       89 print "$root: evaluating nanoscript $this\->[$i]:\n\t${\(escape7($code))}\n" if $DEBUG;
  0         0  
1424 36         2333 $this->[$i] = eval $code;
1425 36 50       191 print "\n\tresult: $this->[$i]\n" if $DEBUG;
1426             } else {
1427 1 50       8 die unless ref($this) =~ 'HASH';
1428 1         4 my $code = $this->{$where};
1429 1 50       5 print "$root: evaluating nanoscript $this\->{$where}:\n\t${\(escape7($code))}\n" if $DEBUG;
  0         0  
1430 1         66 $this->{$where} = eval $code;
1431 1 50       7 print "\n\tresult: $this->{$where}\n" if $DEBUG;
1432             }
1433             }
1434 19         91 return $#ns + 1;
1435             }
1436            
1437             sub warnings(;$) {
1438 1     1 1 6 my $self = shift;
1439 1 50       5 if ($self) {
1440 0         0 my $a = $self->get(-read_result=>);
1441 0 0       0 return $a->[0] if ref $a;
1442 0         0 return 0;
1443             } $Warnings
1444 1         6 }
1445            
1446             sub errors(;$) {
1447 16     16 1 6658 my $self = shift;
1448 16 100       62 if ($self) {
1449 15         73 my $a = $self->get(-read_result=>);
1450 15 50       100 return $a->[1] if ref $a;
1451 0         0 return 0;
1452             } $Errors
1453 1         3 }
1454            
1455             sub broken(;$) {
1456 0     0 1 0 my $self = shift;
1457 0 0       0 if ($self) {
1458 0         0 my $a = $self->get(-read_result=>);
1459 0 0       0 return $a->[2] if ref $a;
1460 0         0 return 0;
1461             } $Broken
1462 0         0 }
1463            
1464             sub missing_input(;$) {
1465 4     4 1 10 my $self = shift;
1466 4 50       10 if ($self) {
1467 0         0 my $a = $self->get(-read_result=>);
1468 0 0       0 return $a->[3] if ref $a;
1469 0         0 return 0;
1470             } $MissingInput
1471 4         13 }
1472            
1473             sub messages(;$) {
1474 1 50   1 1 7 return unless defined wantarray; # void context
1475 1         259 my $self = shift;
1476 1 50       147 if ($self) {
1477 0         0 my $a = $self->get(-read_result=>);
1478 0 0       0 return @{$a->[4]} if ref $a;
  0         0  
1479 1 50       24 } return wantarray ? @Messages : \@Messages
1480             }
1481            
1482             sub predefined_options($) {
1483 1250   50 1250 1 3314 my $name = shift || 'default';
1484 1250 50       3268 carp "\nunknown compile-options '$name'" unless exists $PredefinedOptions{$name};
1485 1250         2804 $PredefinedOptions{$name};
1486             }
1487            
1488             sub complete_options(;$$);
1489             sub complete_options(;$$)
1490             {
1491 924   100 924 1 629243 my($opts, $base) = (shift||'default', shift||'default');
      100        
1492 924         2395 my $using_default = ($base eq 'default');
1493 924 100       2770 $opts = predefined_options($opts) unless ref $opts;
1494 924 50       3051 $base = predefined_options($base) unless ref $base;
1495            
1496             # Make a new hash, copy all keys not already in $opts from $base.
1497 924         16409 $opts = { %$opts };
1498 924 50       3863 $opts->{_base} = ref($base) ? 'some hash' : $base;
1499 924         3352 while (my($k, $v) = each %$base) {
1500 13718 100       52169 $opts->{$k} = $v unless exists $opts->{$k}
1501             }
1502            
1503             # Finally complete $opts with "default" and return the new hash.
1504 924 100       2316 $opts = complete_options($opts) unless $using_default;
1505 924         2479 $opts
1506             }
1507            
1508             sub write($;$$$);
1509             sub write($;$$$)
1510             {
1511 651     651 1 18066 my($data, $output) = (shift, shift);
1512 651         1265 my($options, $header) = @_;
1513 651 50       1643 local $| = 1 if $DEBUG;
1514            
1515 651 100       1661 if (ref($data) eq __PACKAGE__) {
1516             $data->dock(sub {
1517 207   66 207   1008 $output ||= $data->get('-output');
1518 207   66     627 $options ||= $data->get('-options');
1519 207   33     750 $header ||= $data->get('-header');
1520 207         1507 Data::Rlist::write($data->get('-data'), $output, $options, $header) });
  207         505  
1521             } else {
1522             # $data is any Perl data or undef. Reset package globals, validate $options, then compile
1523             # $data.
1524            
1525 444   66     2138 my $to_string = ref $output || not defined $output;
1526 444         870 my($result, $optname, $fast, $perl);
1527 444 0 33     2134 $options ||= ($to_string ? 'string' : 'fast');
1528 444 100       1072 unless (ref $options) {
1529 86 100       265 $fast = 1 if $options eq 'fast';
1530 86 50       240 $perl = 1 if $options eq 'perl';
1531 86         182 $optname = "'$options'";
1532 86 100 66     379 $options = predefined_options($options) unless $fast || $perl;
1533             } else {
1534 358   100     599 $optname = "custom, based on '${\($options->{_base} || 'default')}'";
  358         2065  
1535             }
1536 444 100 66     2155 unless ($fast || $perl) {
1537 363 100       1125 $options->{auto_quote} = 1 unless defined $options->{auto_quote};
1538             }
1539            
1540 444 100       1373 unless ($to_string) {
1541             # Compile $data into a file named $output. Create a new file, exclusively lock it. It
1542             # is guaranteed that no other process will be able to run flock(FH,2) on the same file
1543             # while we hold the lock. (Because the OS suspends and blocks other processes.)
1544            
1545 108 50 33     680 confess $output if not defined $output or ref $output; # or not_valid_pathname($output)
1546 108         256 my($to_stdout, $fh) = $output eq '-';
1547 108 50       255 if ($to_stdout) {
1548 0 0       0 open($fh, ">$output") or confess("\nERROR: $!");
1549             } else {
1550 108 50 33     1078713 (open($fh, ">$output") and flock($fh, 2)) or
1551             confess("\nERROR: $output: can't create and lock Rlist-file: $!");
1552             }
1553            
1554             # Build file header. Compile $data to file $fh, return undef.
1555            
1556 10   50 10   20622 my $host = eval { use Sys::Hostname; hostname; } || 'some unknown machine';
  10         16916  
  10         32254  
  108         437  
1557 108   33     45454 my $uid = getlogin || getpwuid($<);
1558 108         4098 my $tm = localtime;
1559 108 100 100     189 my $prec; $prec = $options->{precision} if ref $options and defined $options->{precision};
  108         763  
1560 108 100 100     346 my $eol = $/; $eol = $options->{eol_space} if ref $options and defined $options->{eol_space};
  108         722  
1561 972 100       3199 my @header =
1562 108 50       1312 map { (length) ? "# $_\n" : "#\n" }
    100          
    50          
1563             (($to_stdout ? () :
1564             ("-*-rlist-generic-*-", "", $output, "",
1565             "Created $tm on <$host> by user <$uid>.",
1566             "Random Lists (Rlist) file (see Data::Rlist on CPAN and ).")),
1567             ((defined $prec) ?
1568             sprintf('Numerical precision: fixed-point, rounded to %d decimal places.', $prec) :
1569             sprintf('Numerical precision: floating-point.')),
1570             "Compile options: $optname.",
1571             ($header ? ("", @$header) : ("")));
1572 108         1865 print $fh @header, $eol;
1573            
1574 108 100 66     581 unless ($fast || $perl) {
1575 87 50       398 $result = 1 if compile($data, $options, $fh);
1576             } else {
1577             # Note that we return $Data::Rlist::R here.
1578 21         45 $result = 1;
1579 21 50       68 print $fh ${compile_fast($data)}.$eol if $fast;
  21         67  
1580 21 50       58 print $fh ${compile_Perl($data)}.$eol if $perl;
  0         0  
1581 108         18217 } close $fh;
1582             } else {
1583             # Compile $data into string and return a reference. Here $output has to be undef or a
1584             # string-ref (buffer).
1585 336 50 33     1173 confess $output unless not defined $output or ref $output eq 'SCALAR';
1586 336 100 66     1627 unless ($fast || $perl) {
1587 276         942 $result = compile($data, $options);
1588 276 50       777 $output = $result if ref $output;
1589             } else {
1590 60 50       258 $result = compile_fast($data) if $fast;
1591 60 50       152 $result = compile_Perl($data) if $perl;
1592 60 50       151 $$output = $$result if ref $output; # copy it -> $result is $Data::Rlist::R
1593             }
1594 444         1509 } return $result;
1595             }
1596             }
1597            
1598             sub write_csv($;$$$$);
1599             sub write_csv($;$$$$)
1600             {
1601 24     24 1 55 my($data, $output) = (shift, shift);
1602 24         37 my($options, $columns, $header) = @_;
1603 24 50       64 return 0 unless defined $data;
1604            
1605 24 100       72 if (ref($data) eq __PACKAGE__) {
1606             $data->dock(sub {
1607 12   33 12   75 $output ||= $data->get('-output');
1608 12   66     58 $options ||= $data->get('-options');
1609 12   33     57 $columns ||= $data->get('-columns');
1610 12   33     49 $header ||= $data->get('-header');
1611 12         127 Data::Rlist::write_csv($data->get('-data'), $output, $options, $columns, $header) });
  12         38  
1612             } else {
1613             # $data is anything. In case of undef returns 0. When the file could not be created,
1614             # dies. Otherwise returns 1.
1615             #
1616             # Unless a value looks like a number the value is quoted (strings may have commas).
1617             # read_csv uses split_quoted which keeps quotes and backslashes, then maybe_unquote7()s
1618             # each value.
1619            
1620 12         50 $options = complete_options($options, 'default');
1621 12   66     66 my $to_string = ref $output || not defined $output;
1622 12         32 my($separator, $prec, $auto_quote) = map { $options->{$_} } qw/separator precision auto_quote/;
  36         107  
1623 12 50 33     40 my $eol = $/; $eol = $options->{eol_space} if ref $options and defined $options->{eol_space}; $eol ||= "\n";
  12   50     89  
  12         28  
1624 12         30 my $result = '';
1625 12 50       29 $auto_quote = 0 unless defined $auto_quote;
1626 12 50       31 $result.= join($separator, @$columns).$eol if $columns;
1627 3876 100       7016 $result.= join($eol, map {
    50          
    100          
1628 12 50       39 join($separator, map { is_number($_)
  24         71  
1629             ? (defined($prec) ? round($_, $prec) : $_)
1630             : ($auto_quote ? maybe_quote7($_) : $_)
1631             } @$_) } @$data).$eol if @$data;
1632            
1633 12 100       51 if ($to_string) {
1634 6 50       29 if (ref $output) {
1635 6         25 $$output = $result; return $output
  6         62  
1636             } else {
1637 0         0 return \$result;
1638             }
1639             } else {
1640 6         19 my($to_stdout, $fh) = ($output eq '-');
1641 6 50       20 local $| = 1 if $DEBUG;
1642 6 50       20 if ($to_stdout) {
1643 0 0       0 open($fh, ">$output") or confess("\nERROR: $!");
1644             } else {
1645 6 50 33     2543 (open($fh, ">$output") and flock($fh, 2)) or
1646             confess("\nERROR: $output: can't create and lock CSV-file: $!");
1647             }
1648 6         148 print $fh $result;
1649 6         5478 close $fh; 1
  6         71  
1650             }
1651             }
1652             }
1653            
1654             sub write_conf($;$$$$)
1655             {
1656 2     2 1 5 my($data, $output, $options, $header) = @_;
1657 2 50 33     23 $options ||= $data->get('options') if ref($data) eq __PACKAGE__;
1658 2   66     19 my $have_sep = ref($options) && defined $options->{separator};
1659 2 50       9 $options = complete_options($options) unless ref $options;
1660 2 100       10 $options->{separator} = $DefaultConfSeparator unless $have_sep;
1661 2         11 return write_csv($data, $output, $options, $header);
1662             }
1663            
1664             sub write_string($;$) {
1665 336   50 336 1 4725 my($data, $options) = (shift, shift||'string');
1666 336         504 my $strref;
1667 336 100       1084 if (ref($data) eq __PACKAGE__) {
1668 105         356 my $out = $data->get('output');
1669 105         354 $data->set(-output => undef);
1670 105         386 $strref = Data::Rlist::write($data, undef, $options);
1671 105         819 $data->set(-output => $out);
1672             } else {
1673 231         673 $strref = Data::Rlist::write($data, undef, $options);
1674 336         936 } return $strref;
1675             }
1676            
1677             sub write_string_value($;$) {
1678 3   50 3 1 14 my($data, $options) = (shift, shift||'default');
1679 3 50       11 local $MaxDepth = $DefaultMaxDepth if $MaxDepth == 0;
1680 3         4 return ${Data::Rlist::write_string($data, $options)};
  3         17  
1681             }
1682            
1683             sub keelhaul($;$) {
1684 233     233 1 2112 my($data, $options) = (shift, shift);
1685 233 50 66     1809 carp 'Cannot keelhaul Perl data' if defined $options and $options eq 'perl'; # TODO: eval back
1686 233   66     763 $options ||= complete_options({ precision => undef }, 'squeezed');
1687 233         708 my $strref = Data::Rlist::write_string($data, $options);
1688 233 50       804 local $MaxDepth = $DefaultMaxDepth if $MaxDepth == 0;
1689 233         741 my $deep_copy = read_string($strref);
1690 233 100       7291 return wantarray ? ($deep_copy, $strref) : $deep_copy;
1691             }
1692            
1693             =head2 Implementation Functions
1694            
1695             =over
1696            
1697             =item F
1698            
1699             =item F
1700            
1701             Open/close Rlist text file or string INPUT for parsing. Used internally by F> and
1702             F>.
1703            
1704             B
1705            
1706             The function can preprocess the INPUT file using FILTER. Use the special value 1 to select the
1707             default C preprocessor (F). FILTER-ARGS is an optional string of additional
1708             command-line arguments to be appended to FILTER. For example,
1709            
1710             my $foo = Data::Rlist::read("foo", 1, "-DEXTRA")
1711            
1712             eventually does not parse F, but the output of the command
1713            
1714             gcc -E -Wp,-C -DEXTRA foo
1715            
1716             Hence within F now C-preprocessor-statements are allowed. For example,
1717            
1718             {
1719             #ifdef EXTRA
1720             #include "extra.rlist"
1721             #endif
1722            
1723             123 = (1, 2, 3);
1724             foobar = {
1725             .
1726             .
1727            
1728             B
1729            
1730             This mode uses F and a temporary file. It is enabled by setting F<$Data::Rlist::SafeCppMode>
1731             to 1 (the default is 0). It protects single-line F<#>-comments when FILTER begins with either
1732             F, F or F. F> then additionally runs F to convert all input
1733             lines beginning with whitespace plus the F<#> character. Only the following F-commands are
1734             excluded, and only when they appear in column 1:
1735            
1736             - F<#include> and F<#pragma>
1737            
1738             - F<#define> and F<#undef>
1739            
1740             - F<#if>, F<#ifdef>, F<#else> and F<#endif>.
1741            
1742             For all other lines F converts F<#> into F<##>. This prevents the C preprocessor from
1743             evaluating them. Because of Perl's limited F function, which isn't able to dissolve long
1744             pipes, the invocation of F requires a temporary file. The temporary file is created in the
1745             same directory as the input file. When you only use F and F comments, however, this
1746             read mode is not required.
1747            
1748             =cut
1749            
1750             sub open_input($;$$)
1751             {
1752 517     517 1 1448 my($input, $fcmd, $fcmdargs) = @_;
1753 517         843 my($rls, $filename);
1754 517         1589 my $rtp = reftype $input;
1755            
1756 517 50 66     2455 carp "\n${\((caller(0))[3])}: filename or scalar-ref required as INPUT" if defined $rtp && $rtp ne 'SCALAR';
  0         0  
1757 517 50       1450 carp "\n${\((caller(0))[3])}: package locked" if $Readstruct;
  0         0  
1758 517         1013 $Readstruct = $ReadFh = undef;
1759 517 50       1468 local $| = 1 if $DEBUG;
1760            
1761 517 50       1272 if (defined $input) {
1762 517         1623 $Readstruct = { };
1763 517 100       1385 unless (ref $input) {
1764 138         485 $Readstruct->{filename} = $input;
1765 138 50       298 unless ($fcmd) { # the file is read unfiltered
1766 138 100 66     8592 unless (open($Readstruct->{fh}, "<$input") && flock($Readstruct->{fh}, 1)) {
1767 1         2 $Readstruct = undef;
1768 1         9 pr1nt('ERROR', "input file '$input'", $!);
1769             }
1770             } else { # pipe it through $fcmt
1771 0 0       0 $fcmd = "gcc -E -Wp,-C -x c++" if $fcmd == 1;
1772 0 0       0 $fcmd = "$fcmd $fcmdargs" if $fcmdargs;
1773            
1774 0 0       0 if ($SafeCppMode) {
1775 0 0       0 if ($fcmd =~ /^(gcc|g\+\+|cpp)/i) {
1776             # Filter input with sed:
1777             #
1778             # (1) Because known #-commands must start at column 1 we first escape all
1779             # indented '#'s into '##'s:
1780             # "(^ +)#" -> '$1\#'
1781             # (2) Next we prefix the known commands with a blank, e.g.
1782             # "#if 0" -> " #if 0"
1783             # (3) Finally we escape all unknown #-commands at column 1:
1784             # "^#" -> "\#"
1785             #
1786             # lexln will then reverse the escaping. Since the builtin open does not
1787             # support true pipes, a temporary file receives the output of sed, which is
1788             # then preprocessed. The temporary file will be removed in close_input.
1789            
1790 0         0 my($sedfh, $tmpfh);
1791 0 0       0 open($sedfh,
1792             "sed '".
1793             join('; ', ("s/^\\([ \t][ \t]*\\)#/\\1\\\\#/", # many seds don't know \t -> insert literally
1794             "s/^#\\(include\\|pragma\\|if\\|ifdef\\|else\\|endif\\|define\\|undef\\)/ #\\1/",
1795             "s/^#/\\\\#/")).";' <$input 2>nul |") ||
1796             die "\nERROR: input file '$fcmd': $!";
1797 0         0 my($tmpinput, $i) = (undef, 0);
1798 0         0 do { $tmpinput = $input.'.tmp'.$i++ } while -e $tmpinput;
  0         0  
1799 0         0 $Readstruct->{tmpfile} = $input = $tmpinput;
1800 0 0       0 open ($tmpfh, ">$input") || die "\nERROR: temporary file '$input': $!";
1801 0         0 print $tmpfh readline($sedfh);
1802 0         0 close $tmpfh;
1803 0         0 close $sedfh;
1804             }
1805             }
1806            
1807             # Open the file $input (or the temporary sed'd file) for preprocessing.
1808            
1809 0 0       0 unless (open($Readstruct->{fh}, "$fcmd $input 2>nul |")) {
1810 0         0 $Readstruct = undef;
1811 0         0 pr1nt('ERROR', "preprocessed input '$fcmd $input': $!");
1812             }
1813             }
1814            
1815 138 100       588 if (defined $Readstruct) {
1816 137         305 $ReadFh = $Readstruct->{fh};
1817 137         265 $LnArray = undef;
1818 137         255 $Ln = '';
1819             }
1820             } else {
1821             # Input is a string-ref. It will be split into lines at LF or CR+LF. But when it has
1822             # no newlines it is read as one big line.
1823            
1824 379 50       815 carp "cannot preprocess strings" if $fcmd;
1825 379         9740 $LnArray = [ split /\r*\n/, $$input ];
1826 379         1300 $Ln = '';
1827             }
1828             } $Readstruct
1829 517         1835 }
1830            
1831             sub close_input()
1832             {
1833 516 100   516 1 4920 close($Readstruct->{fh}) if $Readstruct->{fh};
1834 516 50       1486 if ($Readstruct->{tmpfile}) {
1835 0 0       0 unlink($Readstruct->{tmpfile}) ||
1836             croak "\nERROR: could not temporary file '$Readstruct->{tmpfile}': $!";
1837             }
1838 516         1269 $LnArray = $Ln = $Readstruct = undef;
1839             }
1840            
1841             =item F
1842            
1843             Lexical scanner. Called by F> to split the current line into tokens. F reads F<#>
1844             or F single-line-comment and F multi-line-comment as regular white-spaces. Otherwise it
1845             returns tokens according to the following table:
1846            
1847             RESULT MEANING
1848             ------ -------
1849             '{' '}' Punctuation
1850             '(' ')' Punctuation
1851             ',' Operator
1852             ';' Punctuation
1853             '=' Operator
1854             'v' Constant value as number, string, list or hash
1855             '??' Error
1856             undef EOF
1857            
1858             F appends all here-doc-lines with a newline character. For example,
1859            
1860             <
1861             a
1862             b
1863             test1
1864            
1865             is effectively read as C<"a\nb\n">, which is the same value as the equivalent here-doc in Perl has.
1866             So, not all strings can be encoded as a here-doc. For example, it might not be quite obvious to
1867             many programmers that C<"foo\nbar"> cannot be expressed as here-doc.
1868            
1869             =item F
1870            
1871             Read the next line of text from the current input. Return 0 if F>, otherwise return 1.
1872            
1873             =item F
1874            
1875             Return true if current input file/string is exhausted, false otherwise.
1876            
1877             =item F
1878            
1879             Read Rlist language productions from current input. This is a fast, non-recursive parser driven by
1880             the parser map F<%Data::Rlist::Rules>, and fed by F>. It is called internally by
1881             F>. F returns an array- or hash-reference, or F in case of parsing
1882             F>.
1883            
1884             =cut
1885            
1886             # Local variables for lex(). Note that since lexical variables are init'd at compile-time, they're
1887             # available in BEGIN blocks.
1888            
1889             my $RELexNumber = qr/^($REFloatHere)/; # number constant
1890             my $RELexSymbol = qr/^($RESymbolHere)/; # symbolic name without quotes
1891             my $RELexQuotedString = qr/^\"((?:\\[nrbftv\"\'\\]|\\[0-7]{3}|[^\"])*)\"/; # quoted string constant
1892             my $RELexQuotedSymbol = qr/^"($RESymbolHere)"/; # symbolic name in quotes
1893             my $RELexPunctuation = qr/^[$REPunctuationCharacter]/;
1894             my $C1;
1895            
1896             BEGIN {
1897 10     10   882 $REIsPunct[$_] = 0 foreach 0..255;
1898 10         30 $REIsPunct[ 61] = 1; # =
1899 10         24 $REIsPunct[ 44] = 1; # ,
1900 10         21 $REIsPunct[ 59] = 1; # ;
1901 10         18 $REIsPunct[123] = 1; # {
1902 10         19 $REIsPunct[125] = 1; # }
1903 10         17 $REIsPunct[ 40] = 1; # (
1904 10         19 $REIsPunct[ 41] = 1; # )
1905            
1906 10         950 $REIsDigit[$_] = 0 foreach 0..255;
1907 10         70 $REIsDigit[$_] = 1 foreach 48.. 57;
1908 10         2647688 $REIsDigit[43] = $REIsDigit[45] = $REIsDigit[46] = 1;
1909             }
1910            
1911             sub lex()
1912             {
1913             # First reduce leading whitespace and empty lines. Set $C1 to the ASCII code of the first
1914             # character in the current line $Ln.
1915             #
1916             # The Perl \s regex matches [ \t\n\r\f], but
1917             # ($C1 <= 32 && ($C1 == 32 || $C1 == 9 || $C1 == 10 || $C1 == 13 || $C1 == 12))
1918             # is still more efficient. However, to make it even faster we use
1919             # ($C1 <= 32)
1920            
1921 54120 50   54120 1 129164 unless (defined $Ln) {
1922 0 0       0 return undef unless lexln(); # fetch next $Ln or stop
1923             }
1924             NEXTC1:
1925 73949 100       153401 unless ($C1 = ord($Ln)) { # ord returns 0 on empty strings
1926 13446 100       21358 return undef unless lexln();
1927 12954         37681 goto NEXTC1;
1928             }
1929 60503 100       133240 if ($C1 <= 32) {
1930 27822         96341 $Ln =~ s/^\s+//o;
1931 27822 100       81874 goto NEXTC1 unless $C1 = ord($Ln);
1932             }
1933            
1934             # Puncutators = , ; { } ( )
1935            
1936             #if ($Ln =~ $RELexPunctuation) {
1937             #if ($C1 == 61 || $C1 == 44 || $C1 == 59 || $C1 == 123 || $C1 == 125 || $C1 == 40 || $C1 == 41) {
1938 54751 100       124869 if ($REIsPunct[$C1]) {
1939 30501         58543 $Ln = substr($Ln, 1);
1940 30501         118618 return chr($C1);
1941             }
1942            
1943             # Number scalars. C language single/double-precision numbers. Test if $C1 is a digit, '.', '-'
1944             # or '+'.
1945            
1946             #if (($C1 >= 48 && $C1 <= 57) || $C1 == 43 || $C1 == 45 || $C1 == 46) {
1947 24250 100       57473 if ($REIsDigit[$C1]) {
1948 13345 100 33     89391 if ($Ln =~ s/$RELexNumber//o) {
    50 33        
1949 13328         29570 push @VStk, $1;
1950 13328         43502 return 'v';
1951             } elsif (($C1 == 45 || $C1 == 46) && $Ln =~ s/$RELexSymbol//o) {
1952             # Symbolic name (unquoted string) beginning with '-' or '.'.
1953 17         1525 push @VStk, $1;
1954 17         63 return 'v';
1955             } else {
1956 0         0 return syntax_error(qq'unrecognized number "$Ln"');
1957             }
1958             }
1959            
1960             # String scalars, un/quoted, here-docs.
1961            
1962 10905 100       27967 if ($C1 == 34) { # "
    100          
1963             # String scalar, quoted. Removes the quotes and unesacpes the strings (compile adds
1964             # quotes).
1965            
1966             #if (0) {
1967             # BUG: the regex engine of perl 5.8.7 (Cygwin) unconditionally exits when it tried to
1968             # match a large quoted string, e.g. >8000 characters. perldb provides no hint
1969             # why. This problem once occurred during intensive testing of this package.
1970            
1971             #if (length($Ln) > 1000) {
1972             #print STDERR "string len=".length($Ln)." val = \n\n$Ln\n\n" if $DEBUG;
1973            
1974             # TODO: take a precautionary approach because of bug/misbehaviors in Perl's regex
1975             # engine now (see above).
1976             #}
1977             #}
1978            
1979             # if ($Ln =~ s/$RELexQuotedSymbol//o) { # no escape sequences
1980             # push @VStk, $1;
1981             # return 'v';
1982             # }
1983            
1984 4237 50       63354 if ($Ln =~ s/$RELexQuotedString//o) { # maybe has escape sequences
1985 4237         9428 push @VStk, unescape7($1);
1986 4237         14799 return 'v';
1987             } else {
1988             # There was no closing '"' found on this line. To recover from this error (which is
1989             # hard) we simply continue to fetch lines until EOF, or $RELexQuotedString happens to
1990             # match. Then we return '??' instead of 'v'.
1991            
1992 0         0 my $Lnprev;
1993 0         0 syntax_error("unterminated quoted string '$Ln'");
1994 0         0 while (1) {
1995 0         0 $Lnprev = $Ln;
1996 0 0       0 unless (lexln()) {
1997 0         0 syntax_error("EOF in quoted string"); last;
  0         0  
1998             }
1999 0         0 $Ln = $Lnprev.$Ln;
2000 0 0       0 last if $Ln =~ s/$RELexQuotedString//o;
2001 0         0 } return '??';
2002             }
2003             } elsif ($C1 == 60) { # <
2004 218 50       1440 if ($Ln =~ s/<<([_\w]+)//io) {
2005             # Fetch lines until $tok appears at top of a line. Then continues at $rest of original
2006             # line. If not EOF the next call to lexln() will return the next line after the line
2007             # that had closed the here-doc.
2008            
2009 218         714 my($tok, $rest, @ln, $ok) = ($1, $Ln);
2010 218         359 my $nanoscript = ($tok eq $DefaultNanoscriptToken);
2011 218         455 while ($ok = lexln()) {
2012 2211 100       8936 if ($Ln =~ /^$tok\s*$/m) {
2013 218         331 $Ln = $rest; last;
  218         318  
2014             } else {
2015 1993         3259 push @ln, unescape7($Ln)
2016             }
2017             }
2018 218 50       379 unless ($ok) {
2019 0 0       0 confess unless at_eof();
2020 0         0 return syntax_error(qq(EOF while reading here-document '$tok'));
2021             } else {
2022 218         958 push @VStk, join("\n", @ln)."\n"; # add newline to all lines
2023 218 100       1104 return $nanoscript ? 'n' : 'v';
2024             }
2025             }
2026             }
2027            
2028             # Jump over comments. '//' or '#' single-line-comment, '/*' multi-line-comment.
2029            
2030 6450 100       16614 if ($C1 == 35) { # '#'
    50          
2031 1123         1639 $Ln = ''; goto NEXTC1;
  1123         6167  
2032             } elsif ($C1 == 47) { # '/'
2033 0 0       0 if ($Ln =~ /^\/[\*\/]/o) {
2034 0 0       0 goto NEXTC1 if $Ln =~ s/^\/\*.*\*\/\s*//x;
2035 0 0       0 if ($Ln =~ /^\/\//o) {
2036 0         0 $Ln = ''; goto NEXTC1;
  0         0  
2037             }
2038 0         0 while (lexln()) {
2039 0 0       0 if ($Ln =~ /\*\/(.*)/) {
2040 0         0 $Ln = $1; goto NEXTC1;
  0         0  
2041             }
2042 0         0 } return syntax_error(qq(unterminated comment));
2043             }
2044             }
2045            
2046             # Must be a symbolic name (unquoted string). Names are printable and hence have no \NNN
2047             # sequences. (Finally applies a regex.)
2048            
2049 5327 50       25301 if ($Ln =~ s/$RELexSymbol//o) {
2050 5327         15814 push @VStk, $1;
2051 5327         16400 return 'v';
2052             }
2053            
2054             # Unrecognized character, e.g. '*', single '<', '\''.
2055            
2056 0         0 die "\n".syntax_error(qq(unrecognized character-code $C1).' '.chr($C1));
2057             }
2058            
2059             sub at_eof() {
2060 0 0 0 0 1 0 if ($ReadFh) {
    0          
2061 0         0 return eof($ReadFh);
2062             } elsif (defined $LnArray && $#$LnArray != -1) {
2063 0         0 return 0
2064             } else {
2065 0         0 return 1 # $LnArray undef'd or empty
2066             }
2067             }
2068            
2069             sub lexln() {
2070             # Called from lex to parse Rlist files, and from read_csv.
2071            
2072 15729 100 100 15729 1 84765 if ($ReadFh && !eof($ReadFh)) { # eof(undef) and eof(0) are 1
    100 100        
2073 6603         14288 $Ln = readline($ReadFh); chomp $Ln; # strips $/
  6603         13918  
2074 6603 50       12502 $Ln =~ s/^([ \t]*)\\#/$1#/o if $SafeCppMode;
2075             #print "$Ln\n";
2076 6603         17945 return 1;
2077             } elsif (defined $LnArray && $#$LnArray != -1) {
2078             # Read from string.
2079 8610         14305 $Ln = shift @$LnArray;
2080 8610         24335 return 1;
2081             }
2082 516         736 $Ln = undef;
2083 516         2594 return 0;
2084             }
2085            
2086             sub parse()
2087             {
2088 492     492 1 1167 my($q, $t, $m, $r, $l) = ('');
2089 492         1309 $Warnings = $Errors = $MissingInput = $Broken = 0;
2090 492         1506 @Messages = @VStk = @NStk = ();
2091            
2092 492         1436 while (defined($t = lex())) {
2093             # Push new token, then reduce as many rules as possible from the tail of the queue before
2094             # fetching more tokens. Longer rules are matched first. The constants 2 and 4 are the
2095             # min./max. lengths of rules in %Rules. When $l (the current length of $m) is <2 no rule
2096             # can be matched.
2097            
2098 53628         74175 if (1) {
2099 53628         64813 $q .= $t;
2100 53628         113888 while (($l = length($q)) >= 2) {
2101 80529 100       341363 if ($r = $Rules{substr($q, -4)}) {
    100          
    100          
2102 5758         14091 substr($q, -4) = $r->();
2103             } elsif ($r = $Rules{substr($q, -3)}) {
2104 16062         28807 substr($q, -3) = $r->();
2105             } elsif ($r = $Rules{substr($q, -2)}) {
2106 6050         11311 substr($q, -2) = $r->();
2107 52659         111632 } else { last } # fetch another token
2108             } # match another rule
2109             } else {
2110             # The above loop is ca. 10% faster than the second, so this one is disabled (although
2111             # working). We expect the if(1/0) blocks to be neutralized by the byte-compiler.
2112            
2113             $l = length($q .= $t);
2114             while ($l >= 2) {
2115             $l = 4 if $l > 4;
2116             $m = substr($q, -$l);
2117            
2118             while (1) { # TODO: last if $m begins with [=,;})]
2119             if ($Rules{$m}) { # can reduce a rule $m
2120             printf STDERR "%20s\treducing $m\n", $q if $DEBUG;
2121             substr($q, -$l) = $Rules{$m}->();
2122             $l = length $q; last;
2123             } else {
2124             # $m is not a matching rule. Cut the first character from $m and try
2125             # matching it.
2126             #
2127             # Note that to uickly remove the first character from a string is
2128             # surprisingly hard in Perl. All of the following work:
2129             #
2130             # $m = unpack('x1A'.$l, $m)
2131             # $m = substr($m, 1) # fastest
2132             # substr($m, 0, 1) = ''
2133            
2134             printf STDERR "%20s\tno rule $m\n", $q if $DEBUG && $l > 1;
2135             last if --$l < 2;
2136             $m = substr($m, 1);
2137             }
2138             } last if $Errors;
2139             }
2140             }
2141             }
2142            
2143             # Parser has finished, EOF has been reached (lex had returned undef). The token queue has now
2144             # been reduced to one token and @VStk only contains its value. The token 'h' (hash) or 'l'
2145             # (list). Because of the parser map nature it could also be 'v' (value), in which case it shall
2146             # decay into a hash or list.
2147            
2148 492 50       1201 return undef if $Errors;
2149            
2150 492 50       1294 print STDERR qq'Data::Rlist::parse() reached EOF with "$q"\n' if $DEBUG;
2151 492 100       1258 if (@VStk == 0) {
2152 4 50 33     8 croak STDERR "unexpected, supernumeray tokens after parsing:\n\t$q\n" if $DEBUG && $q;
2153 4         5 $MissingInput = 1; # empty input or non-existing file
2154 4         7 return undef;
2155             } else {
2156 488 50       2845 if (@VStk > 1) {
    50          
    100          
2157 0         0 pr1nt('ERROR', qq'broken input', qq'expected "l" (list) or "h" (hash), not "$q"');
2158 0 0       0 my @overproduced = map { ref($_) ? $_ : Data::Rlist::quote7($_) } @VStk;
  0         0  
2159 0         0 for (my $i = 0; $i <= $#overproduced; ++$i) {
2160 0         0 warning(sprintf("cancelling overbilled value [%u] %s", $i, $overproduced[$i]));
2161             }
2162 0 0       0 print STDERR qq'Data::Rlist::parse() returns undef\n' if $DEBUG;
2163 0         0 return undef;
2164             } elsif (not defined $VStk[0]) {
2165 0         0 confess # dto.
2166             } elsif ($q eq 'v') {
2167 487         1994 my $rtp = reftype $VStk[0]; # result type
2168 487 100       4693 unless (defined $rtp) {
    50          
2169 9         33 $VStk[0] = { $VStk[0] => undef } # not a reference -> the input is just one scalar
2170             } elsif ($rtp !~ /(?:HASH|ARRAY)/) {
2171 0         0 confess quote7($VStk[0]) # shall be an array/hash-reference
2172             }
2173             }
2174             }
2175            
2176 488 50       1408 print STDERR "Data::Rlist::parse() returns $VStk[0]\n" if $DEBUG;
2177 488         1516 return pop @VStk;
2178             }
2179            
2180             =item F
2181            
2182             Build Rlist text from DATA:
2183            
2184             =over
2185            
2186             =item *
2187            
2188             Reference-types F, F, F and F are compiled into text, whether blessed or
2189             not.
2190            
2191             =item *
2192            
2193             Reference-types F are compiled depending on the L|/Compile Options> setting in
2194             OPTIONS.
2195            
2196             =item *
2197            
2198             Reference-types F (L), F and F (file-
2199             and directory handles) cannot be dissolved, and are compiled into the strings C<"?GLOB?">,
2200             C<"?IO?"> and C<"?FORMAT?">.
2201            
2202             =item *
2203            
2204             F'd values in arrays are compiled into the default Rlist C<"">.
2205            
2206             =back
2207            
2208             When FH is defined compile directly to this file and return 1. Otherwise build a string and return
2209             a reference to it. This is the compilation function called when the OPTIONS argument passed to
2210             F> is not omitted, and is not C<"fast"> or C<"perl">.
2211            
2212             =item F
2213            
2214             Build Rlist text from DATA, as fast as actually possible with pure Perl:
2215            
2216             =over
2217            
2218             =item *
2219            
2220             Reference-types F, F, F and F are compiled into text, whether blessed or
2221             not.
2222            
2223             =item *
2224            
2225             F, F, F and F are compiled into the strings C<"?CODE?">, C<"?IO?">,
2226             C<"?GLOB?"> and C<"?FORMAT?">.
2227            
2228             =item *
2229            
2230             F'd values in arrays are compiled into the default Rlist C<"">.
2231            
2232             =back
2233            
2234             F> is the default compilation function. It is called when you pass F or
2235             C<"fast"> in place of the OPTIONS parameter (see F>, F>). Since
2236             F> considers no compile options it will not call code, round numbers, detect
2237             self-referential data etc. Also F> always compiles into a unique package variable
2238             to which it returns a reference.
2239            
2240             =item F
2241            
2242             Like F>, but do not compile Rlist text - compile DATA into Perl syntax. It can
2243             then be F'd. This renders more compact, and more exact output as L. For
2244             example, only strings are quoted. To enable this compilation function pass C<"perl"> to as the
2245             OPTIONS argument, or set the F<-options> attribute of package objects to this string.
2246            
2247             =back
2248            
2249             =cut
2250            
2251             our($Datatype, $K, $V);
2252             our($Outline_data, $Outline_hashes, $Code_refs, $Here_docs, $Auto_quote, $Precision);
2253             our($Eol_space, $Paren_space, $Bol_tabs, $Comma_punct, $Semicolon_punct, $Assign_punct);
2254            
2255             sub compile($;$$)
2256             {
2257 363     363 1 787 my($data, $result) = shift;
2258 363         869 my $options = complete_options(shift);
2259            
2260 363         1644 local($Fh, $Depth, $Broken) = (shift, -1, 0);
2261 363 100       1522 local $RoundScientific = 1 if $options->{scientific};
2262 2178         6430 local($Eol_space, $Paren_space, $Bol_tabs,
2263 363         1039 $Comma_punct, $Semicolon_punct, $Assign_punct) = map { $options->{$_} }
2264             qw/eol_space paren_space bol_tabs
2265             comma_punct semicolon_punct assign_punct/;
2266            
2267 2178         5420 local($Outline_data, $Outline_hashes,
2268 363         963 $Code_refs, $Here_docs, $Auto_quote, $Precision) = map { $options->{$_} }
2269             qw/outline_data outline_hashes
2270             code_refs here_docs auto_quote precision/;
2271            
2272 363 50       1188 $Eol_space = $/ unless defined $Eol_space;
2273            
2274 363 100       1441 return compile1($data) unless $Fh; # return string-reference
2275 87         453 return compile2($data); # return 1
2276             }
2277            
2278             sub comptab($) {
2279 2320 100   2320 0 5635 return '' if $Bol_tabs == 0; # no indentation
2280 1644         5384 return chr(9) x ($Bol_tabs * ($Depth + $_[0])); # use physical TABs
2281             }
2282            
2283             sub compval($) {
2284             # Compile a scalar value (number or string, but not a reference).
2285             #
2286             # TODO: to gain more speed, in compile create a specialized sub depending on globals
2287             # $Precision, $Here_docs.
2288            
2289 13254     13254 0 17792 my $v = shift;
2290 13254 50       30540 if (defined $v) {
2291 13254 100       152681 if ($v !~ $REValue) {
    50          
2292             # Not an identifier, number or quoted string. Hence $v will be quoted, and maybe as
2293             # here-doc.
2294 1690 100       4153 if ($Here_docs) {
2295 793 100       2119 if ($v =~ /\n.*\n\z/os) {
2296             # Here-docs enabled and $v qualifies. We can write only strings with at least
2297             # two LFs as here-docs (although a final LF would be sufficient). Now find a
2298             # token that doesn't interfere with the text: "___", "HERE", "HERE0", "HERE1"
2299             # etc.
2300            
2301 97         921 my @ln = split /\n/, $v;
2302 97         180 my $tok = '___';
2303 97         105 while (1) {
2304 97 50       151 last unless grep { /^$tok/ } @ln;
  1728         3685  
2305 0 0       0 if ($tok =~ /\d\z/) {
2306 0         0 $tok++
2307             } else {
2308 0 0       0 $tok = $tok !~ 'HERE' ? 'HERE' : 'HERE0'
2309             }
2310 97         203 } $v = join('', map { "$_\n" } ("<<$tok", (map { escape7($_) } @ln), $tok));
  1922         3168  
  1728         2341  
2311             } else {
2312 696         1322 $v = quote7($v)
2313             }
2314             } else {
2315 897         1981 $v = quote7($v)
2316             }
2317             } elsif (ord($v) != 34) {
2318             # Not already quoted. Either $v is a number or a symbolic name.
2319 11564 100       32157 if ($Auto_quote) {
    100          
2320 8684 100       68978 if ($v =~ $REFloat) {
2321 7244 100       23632 $v = round($v, $Precision) if defined $Precision;
2322             } else {
2323 1440 50       9565 die $v unless $v =~ $RESymbol;
2324 1440         4481 $v = qq("$v");
2325             }
2326             } elsif (defined $Precision) {
2327 1728 100       17562 $v = round($v, $Precision) if $v =~ $REFloat;
2328             }
2329             }
2330             } $v
2331 13254         33360 }
2332            
2333             sub compile1($);
2334             sub compile1($)
2335             {
2336             # Compile Perl data structure $data into some Rlist and return a string reference.
2337            
2338 13528     13528 0 22940 my $data = shift;
2339 13528         14287 my($r, $inl, $k, $v);
2340            
2341 13528 100       31404 if (ref $data) {
    50          
2342 2007         5713 $Datatype = ord reftype $data;
2343 2007         2378 $Depth++;
2344 2007 50 66     8547 if ($MaxDepth >= 1 && $MaxDepth < $Depth) {
    100          
    100          
    100          
    100          
    50          
2345 0 0       0 pr1nt('ERROR', "compile1() broken in deep $data (max-depth = $MaxDepth)") unless $Broken++;
2346 0         0 $r = DEFAULT_VALUE
2347             } elsif ($Datatype == 65) { # 65 => 'A' => 'ARRAY'
2348 1262         1748 my $cnt = @$data;
2349 1262 50 100     6119 unless ($cnt) {
    100          
2350 0         0 $r = '('.$Paren_space.')';
2351             } elsif ($Outline_data > 0 && $Outline_data <= $cnt) {
2352             # List has more than $Outline_data number of configured elements; print each
2353             # element on a separate line.
2354            
2355 376         782 my($pref0, $pref) = (comptab(0), comptab(1));
2356 376         979 $r.= $Eol_space.$pref0.'('.$Eol_space.$pref;
2357            
2358             # BUG: for some strange reason it destroys $data if assigning the result of the
2359             # recursive compile1() call to $v again. Perl 5.8.6,
2360             # cygwin-thread-multi-64int. Solution: assign temporarily to $w.
2361            
2362 376         604 my $w;
2363 376         633 foreach $v (@$data) {
2364 4084         4112 $w = ${compile1($v)};
  4084         7446  
2365 4084 100       11963 $r.= $Comma_punct.$Eol_space.$pref if $inl; $inl = 1;
  4084         5123  
2366 4084         7418 $r.= $w;
2367             }
2368 376         897 $r.= $Eol_space.$pref0.')';
2369             } else {
2370             # Print all entries to one line.
2371            
2372 886         1052 my $w;
2373 886         1833 $r.= '('.$Paren_space;
2374 886         1411 foreach $v (@$data) {
2375 6498         8157 $w = ${compile1($v)};
  6498         18795  
2376 6498 100       17304 $r.= $Comma_punct if $inl; $inl = 1;
  6498         9373  
2377 6498         23617 $r.= $w;
2378             }
2379 886 50       2026 $r.= $Paren_space if $inl;
2380 886         2735 $r.= ')';
2381             }
2382             } elsif ($Datatype == 72) { # 72 => 'H' => 'HASH'
2383 496         3731 my @keys = sort keys %$data;
2384 496 50       1388 unless (@keys) {
2385 0         0 $r = '{'.$Paren_space.'}';
2386             } else {
2387 496   66     2755 my $manykeys = $Outline_data && @keys;
2388 496         1577 my($pref0, $pref) = (comptab(0), comptab(1));
2389 496         918 foreach $k (@keys) {
2390 2904         6428 $v = $data->{$k};
2391 2904 100       5976 unless ($inl) { # prepare first pair
2392 496 100 66     1782 $r.= $Eol_space.$pref0 if $Outline_hashes && $manykeys;
2393 496         999 $r.= '{'.$Paren_space;
2394 496 100       1232 $r.= $Eol_space if $manykeys; $inl = 1;
  496         756  
2395             }
2396 2904 100       21937 $k = $pref.(($k !~ $REValue) ? quote7($k) : $k);
2397 2904 100       6048 unless (defined($v)) {
2398 240         747 $r.= $k.$Semicolon_punct.$Eol_space; # value is undef
2399             } else {
2400 2664         2746 $v = ${compile1($v)};
  2664         4875  
2401 2664         9299 $r.= $k.$Assign_punct.$v.$Semicolon_punct.$Eol_space;
2402             }
2403             }
2404 496 100       1256 $r.= $pref0 if $manykeys;
2405 496         902 $r.= '}';
2406 496 100       1678 $r.= $Eol_space unless $Depth;
2407             }
2408             } elsif ($Datatype == 82) { # 82 => 'R' => 'REF'
2409 2         6 $r.= ${compile1($$data)}
  2         9  
2410             } elsif ($Datatype == 83) { # 83 => 'S' => 'SCALAR'
2411 2         6 $r.= compval($$data);
2412             } elsif ($Datatype == 67) { # 67 => 'C' => 'CODE'
2413 245 100       1044 $r.= $Code_refs ? ${compile1($data->())} : '"?CODE?"'
  4         10  
2414             } else { # other reference: 'IO', 'GLOB' or 'FORMAT'
2415 0         0 $r.= compval('?'.reftype($data).'?')
2416             }
2417 2007         2367 $Depth--;
2418             } elsif (defined $data) { # $data is some scalar (not a ref)
2419 11521         21511 $r = compval($data);
2420             } else { # $data is undefined
2421 0         0 $r = DEFAULT_VALUE
2422 13528         35785 } \$r;
2423             }
2424            
2425             sub compile2($);
2426             sub compile2($)
2427             {
2428             # Compile Perl data structure $data into some Rlist and directly print into file handle $Fh (do
2429             # not compile a big string such as compile1() does).
2430             #
2431             # WARNING: this must be merely a copy of the compile1() code.
2432            
2433 2382     2382 0 3551 my $data = shift;
2434 2382         2583 my($inl, $k, $v);
2435            
2436 2382 100       5036 if (ref $data) {
    50          
2437 651         1571 $Datatype = ord reftype $data;
2438 651         798 $Depth++;
2439 651 50 66     3476 if ($MaxDepth >= 1 && $MaxDepth < $Depth) {
    100          
    100          
    50          
    50          
    50          
2440 0 0       0 pr1nt('ERROR', "compile2() broken in deep $data (depth = $Depth, max-depth = $MaxDepth)") unless $Broken++;
2441 0         0 print $Fh "\n", DEFAULT_VALUE;
2442             } elsif ($Datatype == 65) { # 65 => 'A' => 'ARRAY'
2443 404         807 my $cnt = 1 + $#$data;
2444 404 50 100     1788 unless ($cnt) {
    100          
2445 0         0 print $Fh '('.$Paren_space.')';
2446             } elsif ($Outline_data > 0 && $Outline_data <= $cnt) {
2447             # List has more than the number of configured elements; print each element on a
2448             # separate line.
2449            
2450 121         236 my($pref0, $pref) = (comptab(0), comptab(1));
2451 121         346 print $Fh $Eol_space.$pref0.'('.$Eol_space.$pref;
2452 121         222 foreach $v (@$data) {
2453 502 100       1381 print $Fh $Comma_punct.$Eol_space.$pref if $inl; $inl = 1;
  502         743  
2454 502         1134 compile2($v);
2455             }
2456 121         263 print $Fh $Eol_space.$pref0.')';
2457 121 50       303 print $Fh $Eol_space unless $Depth;
2458             } else { # print all entries to one line
2459 283         591 print $Fh '('.$Paren_space;
2460 283         584 foreach $v (@$data) {
2461 866 100       2069 print $Fh $Comma_punct if $inl; $inl = 1;
  866         995  
2462 866         1470 compile2($v);
2463             }
2464 283 50       726 print $Fh $Paren_space if $inl;
2465 283         3364 print $Fh ')';
2466             }
2467             } elsif ($Datatype == 72) { # 72 => 'H' => 'HASH'
2468 167         1390 my @keys = sort keys %$data;
2469 167 50       471 unless( @keys ) {
2470 0         0 print $Fh '{'.$Paren_space.'}';
2471             } else {
2472 167   66     703 my $manykeys = $Outline_data && @keys;
2473 167         420 my($pref0, $pref) = (comptab(0), comptab(1));
2474 167         342 foreach $k (@keys) {
2475 1007         1813 $v = $data->{$k};
2476 1007 100       2632 unless ($inl) {
2477 167 100 66     806 print $Fh $Eol_space.$pref0 if $Outline_hashes && $manykeys;
2478 167         386 print $Fh '{'.$Paren_space;
2479 167 100       2006 print $Fh $Eol_space if $manykeys; $inl = 1;
  167         255  
2480             }
2481 1007 100       8457 $k = $pref.(($k !~ $REValue) ? quote7($k) : $k);
2482 1007 100       2140 unless (defined($v)) {
2483 80         257 print $Fh $k.$Semicolon_punct.$Eol_space; # value is undef
2484             } else {
2485 927         1607 print $Fh $k.$Assign_punct;
2486 927         1719 compile2($v);
2487 927         2221 print $Fh $Semicolon_punct.$Eol_space;
2488             }
2489             }
2490 167 100       525 print $Fh $pref0 if $manykeys;
2491 167         292 print $Fh '}';
2492 167 100       1390 print $Fh $Eol_space unless $Depth;
2493             }
2494             } elsif ($Datatype == 82) { # 82 => 'R' => 'REF'
2495 0         0 compile2($$data)
2496             } elsif ($Datatype == 83) { # 83 => 'S' => 'SCALAR'
2497 0         0 print $Fh compval($$data);
2498             } elsif ($Datatype == 67) { # 67 => 'C' => 'CODE'
2499 80 50       144 if ($Code_refs) {
2500 0         0 compile2($data->())
2501             } else {
2502 80         188 print $Fh '"?CODE?"'
2503             }
2504             } else { # other reference: 'IO', 'GLOB' or 'FORMAT'
2505 0         0 print $Fh compval('?'.reftype($data).'?')
2506             }
2507 651         1080 $Depth--;
2508             } elsif (defined $data) { # $data is some scalar (not a ref)
2509 1731         3071 print $Fh compval($data);
2510             } else { # $data is undefined
2511 0         0 print $Fh DEFAULT_VALUE;
2512 2382         23864 } 1
2513             }
2514            
2515             sub compile_fast($)
2516             {
2517 81     81 1 170 my $data = shift;
2518 81         187 $R = ''; $Depth = -1; # reset result string
  81         122  
2519 81         239 compile_fast1($data); # return a string reference
2520 81         215 return \$R; # reference to the package-variable $Data::Rlist::R
2521             }
2522            
2523             sub compile_fast1($);
2524             sub compile_fast1($)
2525             {
2526             # Undefined values always are compiled into the default Rlist, the empty string.
2527             #
2528             # ord() returns 0 when reftype is undef, which it is for scalars. For any reference, blessed
2529             # or not, reftype returns "HASH", "ARRAY", "CODE" or "SCALAR". The $Datatype approach is
2530             # significantly faster than testing whether ref($data)=~'ARRAY' etc.
2531            
2532 642     642 0 797 my $data = $_[0];
2533            
2534 642 50       1216 if (ref $data) {
    0          
2535 642         2060 $Datatype = ord reftype $data;
2536 642         634 $Depth++;
2537 642 100       1390 if ($Datatype == 65) { # 65 => 'A' => 'ARRAY'
    100          
    50          
    50          
2538             # Open arrays in lines of their own, like we do also with hashes. The approach is fast
2539             # and compiles legible text. Lists of lists (matrices) then look nice.
2540            
2541 401 50       733 if (@$data) {
2542 401         842 $R.= chr(10).(chr(9) x $Depth).'(';
2543 401         502 my $in = 0;
2544 401         747 foreach (@$data) {
2545 1362 100       2592 unless ($in) { $in = 1 } else { $R.= ', ' }
  401         480  
  961         1148  
2546 1362 50       2133 if (defined) {
2547 1362 100       2339 if (ref) {
2548 240         480 compile_fast1($_)
2549             } else {
2550 1122 100       8546 $R.= $_ !~ $REValue ? quote7($_): $_
2551             }
2552 0         0 } else { $R.= DEFAULT_VALUE }
2553 401         682 } $R.= ')';
2554 0         0 } else { $R .= '()' }
2555             } elsif ($Datatype == 72) { # 72 => 'H' => 'HASH'
2556 161 50       337 if (%$data) {
2557 161         346 my $pref = chr(9) x $Depth;
2558            
2559             # Sorting is slightly slower than
2560             # while (($K, $V) = each %$data)
2561             # but produces nicer results. Note also that calling is_random_text is generally
2562             # faster than to always quote.
2563            
2564 161         240 $R.= "{\n";
2565 161         1108 foreach $K (sort keys %$data) {
2566 961         1579 $V = $data->{$K};
2567 961 100       6411 $K = quote7($K) if $K !~ $REValue;
2568 961         1642 $R.= $pref.chr(9).$K;
2569 961 100       1914 if (defined $V) {
2570 881         906 $R.= ' = ';
2571 881 100       1647 if (ref $V) {
2572 321         591 compile_fast1($V);
2573             } else {
2574 560 100       4263 $V = quote7($V) if $V !~ $REValue;
2575 560         1272 $R.= $V;
2576             }
2577 961         1525 } $R.= ";\n";
2578 161         400 } $R.= $pref.'}';
2579             } else {
2580 0         0 $R.= '{}'
2581             }
2582             } elsif ($Datatype == 82) { # 82 => 'R' => 'REF'
2583 0         0 compile_fast1($$data)
2584             } elsif ($Datatype == 83) { # 83 => 'S' => 'SCALAR'
2585 0 0       0 $R.= ($$data !~ $REValue) ? quote7($$data) : $$data;
2586             } else { # other reference: 'CODE', 'IO', 'GLOB' or 'FORMAT'
2587 80         258 $R.= '"?'.reftype($data).'?"'
2588             }
2589 642         1206 $Depth--;
2590             } elsif (defined $data) { # number or string
2591 0 0       0 $R.= ($data !~ $REValue) ? quote7($data) : $data;
2592             } else { # undef
2593 0         0 $R.= DEFAULT_VALUE;
2594             }
2595             }
2596            
2597             sub compile_Perl($)
2598             {
2599 0     0 1 0 my $data = shift;
2600 0         0 $R = ''; $Depth = -1; # reset result string
  0         0  
2601 0         0 compile_Perl1($data);
2602 0         0 return \$R;
2603             }
2604            
2605             sub compile_Perl1($);
2606             sub compile_Perl1($)
2607             {
2608 0     0 0 0 my $data = $_[0];
2609             sub __quote7($) {
2610 0     0   0 my $s = shift;
2611 0 0       0 return $s if $s =~ /^["']/;
2612 0         0 return quote7($s);
2613             }
2614            
2615 0 0       0 if (ref $data) {
    0          
2616 0         0 $Datatype = ord reftype $data;
2617 0         0 $Depth++;
2618 0 0       0 if ($Datatype == 65) {
    0          
    0          
    0          
2619 0 0       0 if (@$data) {
2620 0         0 $R.= chr(10).(chr(9) x $Depth).'[';
2621 0         0 my $in = 0;
2622 0         0 foreach (@$data) {
2623 0 0       0 unless ($in) { $in = 1 } else { $R.= ', ' }
  0         0  
  0         0  
2624 0 0       0 if (defined) {
2625 0 0       0 if (ref) {
2626 0         0 compile_Perl1($_)
2627             } else {
2628 0 0       0 $R.= is_number($_) ? $_ : __quote7($_)
2629             }
2630 0         0 } else { $R.= DEFAULT_VALUE }
2631 0         0 } $R.= ']';
2632 0         0 } else { $R .= '[]' }
2633             } elsif ($Datatype == 72) {
2634 0 0       0 if (%$data) {
2635 0         0 my $pref = chr(9) x $Depth;
2636 0         0 $R.= "{\n";
2637 0         0 foreach $K (sort keys %$data) {
2638 0         0 $V = $data->{$K};
2639 0 0       0 $K = __quote7($K) unless is_number($K);
2640 0         0 $R.= $pref.chr(9).$K;
2641 0 0       0 if (defined $V) {
2642 0         0 $R.= ' => ';
2643 0 0       0 if (ref $V) {
2644 0         0 compile_Perl1($V);
2645             } else {
2646 0 0       0 $V = __quote7($V) unless is_number($V);
2647 0         0 $R.= $V;
2648             }
2649 0         0 } $R.= ",\n";
2650 0         0 } $R.= $pref.'}';
2651             } else {
2652 0         0 $R.= '{}'
2653             }
2654             } elsif ($Datatype == 82) {
2655 0         0 compile_Perl1($$data)
2656             } elsif ($Datatype == 83) {
2657 0 0       0 $R.= is_number($data) ? $$data : __quote7($$data);
2658             } else {
2659 0         0 $R.= '"?'.reftype($data).'?"'
2660             }
2661 0         0 $Depth--;
2662             } elsif (defined $data) { # number or string
2663 0 0       0 $R.= is_number($data) ? $data : __quote7($data);
2664             } else { # undef
2665 0         0 $R.= DEFAULT_VALUE;
2666             }
2667             }
2668            
2669             =head2 Auxiliary Functions
2670            
2671             The utility functions in this section are generally useful when handling stringified data.
2672             Internally F>, F>, F> etc. apply precompiled regexes and
2673             precomputed ASCII tables. F> and F> simplify
2674             L. F> and F> are working solutions for floating-point
2675             numbers. F> is a smart function to "diff" two Perl variables. All these
2676             functions are very fast and mature.
2677            
2678             =over
2679            
2680             =item F
2681            
2682             Returns true when a scalar looks like a positive or negative integer constant. The function
2683             applies the compiled regex F<$Data::Rlist::REInteger>.
2684            
2685             =item F
2686            
2687             Test for strings that look like numbers. F can be used to test whether a scalar looks
2688             like a integer/float constant (numeric literal). The function applies the compiled regex
2689             F<$Data::Rlist::REFloat>. Note that it doesn't match
2690            
2691             - leading or trailing whitespace,
2692            
2693             - lexical conventions such as the C<"0b"> (binary), C<"0"> (octal), C<"0x"> (hex) prefix to denote
2694             a number-base other than decimal, and
2695            
2696             - Perls' legible numbers, e.g. F<3.14_15_92>,
2697            
2698             - the IEEE 754 notations of Infinite and NaN.
2699            
2700             See also
2701            
2702             $ perldoc -q "whether a scalar is a number"
2703            
2704             =item F
2705            
2706             Test for symbolic names. F can be used to test whether a scalar looks like a symbolic
2707             name. Such strings need not to be quoted. Rlist defines symbolic names as a superset of C
2708             identifier names:
2709            
2710             [a-zA-Z_0-9] # C/C++ character set for identifiers
2711             [a-zA-Z_0-9\-/\~:\.@] # Rlist character set for symbolic names
2712            
2713             [a-zA-Z_][a-zA-Z_0-9]* # match C/C++ identifier
2714             [a-zA-Z_\-/\~:@][a-zA-Z_0-9\-/\~:\.@]* # match Rlist symbolic name
2715            
2716             For example, names such as F, F, F<--verbose>, F need not
2717             be quoted.
2718            
2719             =item F
2720            
2721             Returns true when a scalar is an integer, a number, a symbolic name or some quoted string.
2722            
2723             =item F
2724            
2725             The opposite of F>. Such scalars will be turned into quoted strings by F>
2726             and F>.
2727            
2728             =cut
2729            
2730 7338 100   7338 1 8394 sub is_integer(\$) { ${$_[0]} =~ $REInteger ? 1 : 0 }
  7338         62722  
2731 31676 100   31676 1 42390 sub is_number(\$) { ${$_[0]} =~ $REFloat ? 1 : 0 }
  31676         282936  
2732 0 0   0 1 0 sub is_symbol(\$) { ${$_[0]} =~ $RESymbol ? 1 : 0 }
  0         0  
2733 8 100   8 1 6316 sub is_value(\$) { ${$_[0]} =~ $REValue ? 1 : 0 }
  8         173  
2734 1164 100   1164 1 1246 sub is_random_text(\$) { ${$_[0]} =~ $REValue ? 0 : 1 }
  1164         13395  
2735            
2736             =item F
2737            
2738             =item F
2739            
2740             Converts TEXT into 7-bit-ASCII. All characters not in the set of the 95 printable ASCII characters
2741             are escaped. The following ASCII codes will be converted to escaped octal numbers, i.e. 3 digits
2742             prefixed by a slash:
2743            
2744             0x00 to 0x1F
2745             0x80 to 0xFF
2746             " ' \
2747            
2748             The difference between the two functions is that F additionally places TEXT into
2749             double-quotes. For example, Fher Mittag\n"')> returns C<"\"Fr\374her
2750             Mittag\n\"">, while F returns C<\"Fr\374her Mittag\n\">
2751            
2752             =item F
2753            
2754             Return F if F(TEXT)>; otherwise (TEXT defines a symbolic name or
2755             number) return TEXT.
2756            
2757             =item F
2758            
2759             Return F when TEXT is enclosed by double-quotes; otherwise returns TEXT.
2760            
2761             =item F
2762            
2763             =item F
2764            
2765             Reverses what F> and F> did with TEXT.
2766            
2767             =item F
2768            
2769             Combines recipes 1.11 and 1.12 from the Perl Cookbook. HERE-DOC-STRING shall be a
2770             L. The function checks whether each line
2771             begins with a common prefix, and if so, strips that off. If no prefix it takes the amount of
2772             leading whitespace found the first line and removes that much off each subsequent line.
2773            
2774             Unless COLUMNS is defined returns the new here-doc-string. Otherwise, takes the string and
2775             reformats it into a paragraph having no line more than COLUMNS characters long. FIRSTTAB will be
2776             the indent for the first line, DEFAULTTAB the indent for every subsequent line. Unless passed,
2777             FIRSTTAB and DEFAULTTAB default to the empty string C<"">.
2778            
2779             =cut
2780            
2781             our(%g_nonprintables_escaped, # keys are non-printable ASCII chars, values are escape sequences
2782             %g_escaped_nonprintables, # keys are escaped sequences, values are the non-printables
2783             $REnonprintable,
2784             $REescape_seq);
2785            
2786             BEGIN {
2787             # Perl should not use/require the same module twice. However, the die below may throw when
2788             # Rlist.pm is symlinked. (This is a mature package, and we experienced many scenarios with it
2789             # so far.) For example, when Rlist.pm is installed locally to ~/bin and ~/bin is in @INC, one
2790             # can say
2791             # use Rlist;
2792             # to read the package Data::Rlist. But in order to
2793             # use Data::Rlist;
2794             # as with the regularily installed version (from CPAN), one must create ~/bin/Data/Rlist.pm.
2795             # If this is a symlink to ~/bin/Rlist.pm the same file might be used twice by perl.
2796            
2797 10 50   10   83 croak "${\(__FILE__)} used/required twice" if %g_escaped_nonprintables;
  0         0  
2798            
2799             # Tabulate octalization. In previous versions escape7() was implemented so
2800             #
2801             # sub _octl {
2802             # $n = ord($1);
2803             # '\\'.($n >> 6).(($n >> 3) & 7).($n & 7);
2804             # }
2805             # s/([\x00-\x1F\x80-\xFF])/_octl()/ge # non-printables => \NNN
2806             #
2807             # which has now been optimized into
2808             #
2809             # s/$REnonprintable/$g_nonprintables_escaped{$1}/go
2810            
2811             sub escape_char($) {
2812 1600     1600 0 1749 my $c = ord($_[0]); # get number code, eg. 'ü' => 252
2813 1600         6858 '\\'.($c >> 6).(($c >> 3) & 7).($c & 7); # eg. 252 => \374
2814             }
2815            
2816             sub unescape_char($) { # w/o leading backslash
2817 1600     1600 0 6084 pack('C', oct($_[0])); # deoctalize eg. 11 => 9 => \t
2818             }
2819            
2820 10         67 $REescape_seq = qr/\\([0-7]{1,3}|[nrt"'\\])/;
2821 10         32 $REnonprintable = qr/([\x00-\x1F\x80-\xFF"'])/;
2822            
2823             # Build tables for non-printable ASCII chararacters.
2824            
2825 10         40 %g_nonprintables_escaped = map { chr($_) => escape_char(chr($_)) } (0x00..0x1F, 0x80..0xFF);
  1600         3072  
2826 10         582 my @v = values %g_nonprintables_escaped;
2827 10         33 foreach (@v) {
2828 1600 50       6724 s/^\\// or die;
2829 1600 50       3381 croak $_ if exists $g_escaped_nonprintables{$_};
2830 1600         2432 $g_escaped_nonprintables{$_} = unescape_char($_)
2831             }
2832            
2833 10 50       49 croak unless keys(%g_nonprintables_escaped) == (255 - 95);
2834 10 50       99 croak join(" ", keys %g_escaped_nonprintables) unless keys(%g_escaped_nonprintables) == (255 - 95);
2835             #croak sort keys %g_escaped_nonprintables;
2836            
2837             # Add \ " ' into the tables, which spares another s// call in escape and unescape for them. The
2838             # leading \ is alredy matched by $REescape_seq.
2839            
2840 10         26 $g_nonprintables_escaped{chr(34)} = qq(\\"); # " => \"
2841 10         21 $g_nonprintables_escaped{chr(39)} = qq(\\'); # ' => \'
2842            
2843 10         24 $g_escaped_nonprintables{chr(34)} = chr(34);
2844 10         18 $g_escaped_nonprintables{chr(39)} = chr(39);
2845 10         19 $g_escaped_nonprintables{chr(92)} = chr(92);
2846            
2847             # Add \r, \n and \t.
2848            
2849 10         19 if (1) {
2850 10         24 $g_nonprintables_escaped{chr( 9)} = qq(\\t); # \t => \\t
2851 10         82 $g_nonprintables_escaped{chr(10)} = qq(\\n); # \n => \\n
2852 10         20 $g_nonprintables_escaped{chr(13)} = qq(\\r); # \r => \\r
2853            
2854 10         24 $g_escaped_nonprintables{'t'} = chr( 9);
2855 10         20 $g_escaped_nonprintables{'n'} = chr(10);
2856 10         20130 $g_escaped_nonprintables{'r'} = chr(13);
2857             }
2858             }
2859            
2860 1164 100   1164 1 2120 sub maybe_quote7($) { is_random_text($_[0]) ? quote7($_[0]) : $_[0] }
2861 7752 100   7752 1 24552 sub maybe_unquote7($) { ord($_[0]) == 34 ? unquote7($_[0]) : $_[0] }
2862             sub quote7($) {
2863             # Escape, then add quotes. Note that the below expression is faster than qq.
2864 2641     2641 1 8413 '"'.escape7($_[0]).'"'
2865             }
2866            
2867             sub unquote7($) {
2868             # First remove quotes, then unescape. The below expression might look complicated; but it is
2869             # faster than to shift the string and apply s/^\"// and s/\"$// on it.
2870 792 50   792 1 2857 unescape7(ord($_[0]) == 34 ? substr($_[0], 1, length($_[0]) - 2) : $_[0])
2871             }
2872            
2873             sub escape7($) {
2874 4371 100   4371 1 6226 my $s = shift; return '' unless defined $s;
  4371         15519  
2875 4368         7786 $s =~ s/\\/\\\\/g; # has to happen first, because...
2876 4368         28970 $s =~ s/$REnonprintable/$g_nonprintables_escaped{$1}/gos; # ...this will intersperse more backslashes
2877 4368         16240 $s
2878             }
2879            
2880             sub unescape7($) {
2881 7023     7023 1 13764 my $s = shift;
2882 7023         39879 $s =~ s/$REescape_seq/$g_escaped_nonprintables{$1}/gos;
2883 7023         22738 $s
2884             }
2885            
2886             sub unhere($;$$$) {
2887             # Combines recipes 1.11 and 1.12.
2888 0     0 1 0 local $_ = shift;
2889 0         0 my($white, $leader); # common whitespace and common leading string
2890 0 0       0 if (/^\s*(?:([^\w\s]+)(\s*).*\n)(?:\s*\1\2?.*\n)+$/) {
2891 0         0 ($white, $leader) = ($2, quotemeta($1));
2892             } else {
2893 0         0 ($white, $leader) = (/^(\s+)/, '');
2894             }
2895 0         0 s/^\s*?$leader(?:$white)?//gm;
2896            
2897             # This is recipe 1.12
2898 0   0     0 my($columns, $firsttab, $deftab) = (shift, shift||'', shift||'');
      0        
2899 0 0       0 if ($columns) {
2900 10     10   11330 use Text::Wrap;
  10         76442  
  10         1740  
2901 0         0 $Text::Wrap::columns = $columns;
2902 0         0 return wrap($firsttab, $deftab, $_);
2903             } else {
2904 0         0 return $_;
2905             }
2906             }
2907            
2908             =item F
2909            
2910             =item F
2911            
2912             Divide the string INPUT into a list of strings. DELIMITER is a regular expression specifying where
2913             to split (default: C<'\s+'>). The functions won't split at DELIMITERs inside quotes, or which are
2914             backslashed.
2915            
2916             F works like F but additionally removes all quotes and backslashes
2917             from the splitted fields. Both functions effectively simplify the interface of
2918             F. In an array context they return a list of substrings, otherwise the count of
2919             substrings. An empty array is returned in case of unbalanced double-quotes, e.g.
2920             F)>.
2921            
2922             B
2923            
2924             sub split_and_list($) {
2925             print ($i++, " '$_'\n") foreach split_quoted(shift)
2926             }
2927            
2928             split_and_list(q("fee foo" bar))
2929            
2930             0 '"fee foo"'
2931             1 'bar'
2932            
2933             split_and_list(q("fee foo"\ bar))
2934            
2935             0 '"fee foo"\ bar'
2936            
2937             The default DELIMITER C<'\s+'> handles newlines. F)> returns
2938             S> and hence can be used to to split a large string of unF'd input
2939             lines into words:
2940            
2941             split_and_list("foo \r\n bar\n")
2942            
2943             0 'foo'
2944             1 'bar'
2945             2 ''
2946            
2947             The DELIMITER matches everywhere outside of quoted constructs, so in case of the default C<'\s+'>
2948             you may want to remove heading/trailing whitespace. Consider
2949            
2950             split_and_list("\nfoo")
2951             split_and_list("\tfoo")
2952            
2953             0 ''
2954             1 'foo'
2955            
2956             and
2957            
2958             split_and_list(" foo ")
2959            
2960             0 ''
2961             1 'foo'
2962             2 ''
2963            
2964             F additionally removes all quotes and backslashes from the splitted fields:
2965            
2966             sub parse_and_list($) {
2967             print ($i++, " '$_'\n") foreach parse_quoted(shift)
2968             }
2969            
2970             parse_and_list(q("fee foo" bar))
2971            
2972             0 'fee foo'
2973             1 'bar'
2974            
2975             parse_and_list(q("fee foo"\ bar))
2976            
2977             0 'fee foo bar'
2978            
2979             B
2980            
2981             String C<'field\ one "field\ two"'>:
2982            
2983             ('field\ one', '"field\ two"') # split_quoted
2984             ('field one', 'field two') # parse_quoted
2985            
2986             String C<'field\,one, field", two"'> with a DELIMITER of C<'\s*,\s*'>:
2987            
2988             ('field\,one', 'field", two"') # split_quoted
2989             ('field,one', 'field, two') # parse_quoted
2990            
2991             Split a large string F<$soup> (mnemonic: slurped from a file) into lines, at LF or CR+LF:
2992            
2993             @lines = split_quoted($soup, '\r*\n');
2994            
2995             Then transform all F<@lines> by correctly splitting each line into "naked" values:
2996            
2997             @table = map { [ parse_quoted($_, '\s*,\s') ] } @lines
2998            
2999             Here is some more complete code to parse a F<.csv>-file with quoted fields, escaped commas:
3000            
3001             open my $fh, "foo.csv" or die $!;
3002             local $/; # enable localized slurp mode
3003             my $content = <$fh>; # slurp whole file at once
3004             close $fh;
3005             my @lines = split_quoted($content, '\r*\n');
3006             die q(unbalanced " in input) unless @lines;
3007             my @table = map { [ map { parse_quoted($_, '\s*,\s') } ] } @lines
3008            
3009             In core this is what F> does. F> allows you to test what
3010             F> and F> return. For example, the following code shall never
3011             die:
3012            
3013             croak if deep_compare([split_quoted("fee fie foo")], ['fee', 'fie', 'foo']);
3014             croak if deep_compare( parse_quoted('"fee fie foo"'), 1);
3015            
3016             =cut
3017            
3018             sub split_quoted($;$) {
3019             # Split [0] at delimiter [1], returning a list of words/tokens. Delimiter defaults to '\s+'.
3020             #
3021             # We've to map the result of parse_line again to build the result. For "foo\nbar\n" parse_line
3022             # returns ('foo','bar',undef), not ('foo','bar',''). This may cause hard to track "Use of
3023             # uninitialized value..." warnings.
3024            
3025 10     10   13755 use Text::ParseWords;
  10         20554  
  10         2078  
3026 52 100 100 52 1 1296 return map { (defined) ? $_ : '' } parse_line($_[1]||'[\s]+', 1, $_[0])
  7758         323258  
3027             }
3028            
3029             sub parse_quoted($;$) {
3030 10     10   108 use Text::ParseWords;
  10         17  
  10         3552  
3031 2 50 50 2 1 404 return map { (defined) ? $_ : '' } parse_line($_[1]||'[\s]+', 0, $_[0])
  1         46  
3032             }
3033            
3034             =item F
3035            
3036             F> returns true if NUM1 and NUM2 are equal to PRECISION number of decimal places
3037             (default: 6). For details see F>.
3038            
3039             =item F
3040            
3041             Compare and round floating-point numbers NUM1 and NUM2 (as string- or number scalars).
3042            
3043             When the C<"precision"> compile option is defined, F> is called during compilation on all
3044             numbers.
3045            
3046             Normally F will return a number in fixed-point notation. When the package-global
3047             F<$Data::Rlist::RoundScientific> is true, however, F formats the number in either normal or
3048             exponential (scientific) notation, whichever is more appropriate for its magnitude. This differs
3049             slightly from fixed-point notation in that insignificant zeroes to the right of the decimal point
3050             are not included. Also, the decimal point is not included on whole numbers. For example,
3051             F(42)> does not return 42.000000, and F returns 0.12, not 0.120000.
3052            
3053             B
3054            
3055             One needs a function like F to compare floats, because IEEE 754 single- and double precision
3056             implementations are not absolute - in contrast to the numbers they actually represent. In all
3057             machines non-integer numbers are only an approximation to the numeric truth. In other words,
3058             they're not commutative. For example, given two floats F and F, the result of F might
3059             be different than that of F. For another example, it is a mathematical truth that F
3060             * a>, but not necessarily in a computer.
3061            
3062             Each machine has its own accuracy, called the F, which is the difference between 1
3063             and the smallest exactly representable number greater than one. Most of the time only floats can be
3064             compared that have been carried out to a certain number of decimal places. In general this is the
3065             case when two floats that result from a numeric operation are compared - but not two constants.
3066             (Constants are accurate through to lexical conventions of the language. The Perl and C syntaxes for
3067             numbers simply won't allow you to write down inaccurate numbers.)
3068            
3069             See also recipes 2.2 and 2.3 in the Perl Cookbook.
3070            
3071             B
3072            
3073             CALL RETURNS NUMBER
3074             ---- --------------
3075             round('0.9957', 3) 0.996
3076             round(42, 2) 42
3077             round(0.12) 0.120000
3078             round(0.99, 2) 0.99
3079             round(0.991, 2) 0.99
3080             round(0.99, 1) 1.0
3081             round(1.096, 2) 1.10
3082             round(+.99950678) 0.999510
3083             round(-.00057260) -0.000573
3084             round(-1.6804e-6) -0.000002
3085            
3086             =cut
3087            
3088             sub equal($$;$) {
3089 10011     10011 1 15061 my($a, $b, $prec) = @_;
3090 10011 50       27269 $prec = 6 unless defined $prec;
3091 10011         104002 sprintf("%.${prec}g", $a) eq sprintf("%.${prec}g", $b)
3092             }
3093            
3094             sub round($;$) {
3095             # Note that sprintf("%.6g\n", 2006073104) yields 2.00607e+09, which looses digits.
3096 7338 100   7338 1 9947 my $a = shift; return $a if is_integer($a);
  7338         14492  
3097 4566 100       7275 my $prec = shift; $prec = 6 unless defined $prec;
  4566         9305  
3098 4566 100       12805 return sprintf("%.${prec}g", $a) if $RoundScientific;
3099 3798         25511 return sprintf("%.${prec}f", $a);
3100             }
3101            
3102             =item F
3103            
3104             Compare and analyze two numbers, strings or references. Generates a list of messages describing
3105             exactly all unequal data. Hence, for any Perl data F<$a> and F<$b> one can assert:
3106            
3107             croak "$a differs from $b" if deep_compare($a, $b);
3108            
3109             When PRECISION is defined all numbers in A and B are F>'d before actually comparing them.
3110             When TRACE_FLAG is true traces progress.
3111            
3112             B
3113            
3114             Returns an array of messages, each describing unequal data, or data that cannot be compared because
3115             of type- or value-mismatching. The array is empty when deep comparison of A and B found no unequal
3116             numbers or strings, and only indifferent types.
3117            
3118             B
3119            
3120             The result is line-oriented, and for each mismatch it returns a single message. For a simple
3121             example,
3122            
3123             Data::Rlist::deep_compare(undef, 1)
3124            
3125             yields
3126            
3127             <> cmp <<1>> stop! 1st undefined, 2nd defined (1)
3128            
3129             =cut
3130            
3131             sub deep_compare($$;$$$);
3132             sub deep_compare($$;$$$)
3133             {
3134 10     10   62 use Scalar::Util qw/reftype blessed looks_like_number/;
  10         23  
  10         1521862  
3135            
3136 0   0 0 0 0 sub prind($@) { my $ind = shift||0; print STDERR chr(9) x $ind, join(' ', grep { defined } @_), chr(10) }
  0         0  
  0         0  
3137             #sub quot($) { my $s = shift; $s =~ s/([\n\r\t])/\\&ord($1)/ge; "'$s'" }
3138 4 100   4 0 9 sub quot($) { my $s = shift; defined($s) ? "'$s'" : 'undef' }
  4         68  
3139            
3140 15723     15723 1 17176 my(@R);
3141 15723         31688 my($a, $b, $prec, $dump, $ind) = @_;
3142 15723         44175 my($atp, $btp) = (reftype($a), reftype($b)); # undef, SCALAR, ARRAY or HASH
3143 15723         30078 my($anm, $bnm, $refs) = (0, 0, defined($atp));
3144 15723 50   2   62021 my $prefix = sub { quot($a).($anm ? ' == ' : ' cmp ').quot($b) };
  2         9  
3145             my($mismatch, $match) = sub { # use "lazy instantiation", so that this sub isn't compiled for
3146             # the majority of cases (when two values are equal)
3147 2     2   4 my $s = shift; eval 'push @R, $prefix->()."\tStop! ".$s; prind($ind, $R[$#R]) if $dump;'
  2         301  
3148 15723         60029 };
3149 15723 50   0   48816 $match = sub { my $s = shift; eval 'prind($ind, $prefix->(), $s)' } if $dump;
  0         0  
  0         0  
3150 15723   100     43183 $ind ||= 0;
3151            
3152 15723 100       31530 unless ($refs) { # unless $a is a reference
3153 13910 100       29216 unless (defined $a) {
3154 10         20 $atp = 'undef';
3155 10 100       29 if (defined $b) {
3156 2         5 $mismatch->('only 2nd defined');
3157             } else {
3158 8 50       19 $match->() if $dump; # both undef'd
3159 10         82 } return @R;
3160             } else {
3161 13900 50       29782 unless (defined $b) {
3162 0         0 $mismatch->('only 1st defined');
3163 0         0 return @R;
3164             }
3165 13900 100       28100 $atp = ($anm = is_number($a)) ? 'number' : 'string';
3166 13900 50 66     90086 $a = round($a, $prec) if $anm and defined $prec;
3167             }
3168             }
3169 15713 100       33650 unless (defined $btp) {
3170 13900 50       24039 unless (defined $b) {
3171 0         0 $btp = 'undef';
3172 0 0       0 if (defined $a) {
3173 0         0 $mismatch->('only 1st defined');
3174             } else {
3175 0 0       0 $match->() if $dump; # both undef'd
3176 0         0 } return @R;
3177             } else {
3178 13900 50       30214 unless (defined $a) {
3179 0         0 $mismatch->('only 2nd defined');
3180 0         0 return @R;
3181             }
3182 13900 100       26406 $btp = ($bnm = is_number($b)) ? 'number' : 'string';
3183 13900 50 66     70551 $b = round($b, $prec) if $bnm and defined $prec;
3184             }
3185             }
3186             #die unless defined $a && defined $b;
3187 15713 50       37960 if ($atp ne $btp) {
3188 0         0 $mismatch->("type-mismatch, $atp vs. $btp");
3189 0         0 return @R;
3190             }
3191            
3192             # At this point $a and $b have equal types.
3193 15713 100       27592 unless ($refs) { # compare numbers/strings
3194 13900 100       30425 if ($anm) {
    50          
    50          
3195 10011 50       60621 $prec = (defined $prec) ? " precision=$prec" : '';
3196 10011 50       24210 unless (equal($a, $b)) {
    50          
3197 0         0 $mismatch->($prec)
3198             } elsif ($dump) {
3199 0         0 $match->($prec)
3200             }
3201             } elsif ($a ne $b) {
3202 0         0 $mismatch->('unequal strings')
3203             } elsif ($dump) {
3204 0         0 $match->()
3205             } return @R
3206 13900         89549 } else { # deep-compare two references
3207 1813     15447   6361 my $recurse = sub($$) { deep_compare($_[0], $_[1], $prec, $dump, $ind + 1) };
  15447         40929  
3208 1813 50       3851 prind($ind, $prefix->()) if $dump;
3209 1813 50       9031 if ($atp eq 'SCALAR') { # two scalars refs
    100          
    50          
    0          
3210 0         0 push @R, $recurse->($$a, $$b);
3211             return @R
3212 0         0 } elsif ($atp eq 'HASH') { # two hashes
3213 597         1089 my $acnt = keys %$a;
3214 597         1027 my $bcnt = keys %$b;
3215 597 50       1435 unless ($acnt == $bcnt) {
3216 0         0 $mismatch->("different number of keys ($acnt, $bcnt)");
3217 0         0 return @R;
3218 597 50       1361 } return @R if $acnt == 0; # no keys
3219            
3220             # Although both hashes have an equal number of keys, make sure that the keys themselves
3221             # are equal, and only then compare values.
3222 597         1746 my @a_keys_missing = grep { not exists $b->{$_} } keys %$a;
  3004         6078  
3223 597         1625 my @b_keys_missing = grep { not exists $a->{$_} } keys %$b;
  3004         4910  
3224            
3225 597 50 33     3016 if (@a_keys_missing || @b_keys_missing) {
3226 0 0       0 $mismatch->('1st hash misses keys ('.join(', ', map { quote7($_) } @a_keys_missing).")") if @a_keys_missing;
  0         0  
3227 0 0       0 $mismatch->('2nd hash misses keys ('.join(', ', map { quote7($_) } @b_keys_missing).")") if @b_keys_missing;
  0         0  
3228 0         0 return @R;
3229             }
3230            
3231 597         1636 foreach (keys %$a) {
3232 3004 50       6151 prind($ind, "key '$_'") if $dump;
3233 3004         7403 push @R, $recurse->($a->{$_}, $b->{$_});
3234             }
3235             } elsif ($atp eq 'ARRAY') { # two arrays
3236 1216 50       2582 if ($#$a != $#$b) {
3237 0         0 $mismatch->("different array sizes: ${\(1+$#$a)} vs. ${\(1+$#$b)}")
  0         0  
  0         0  
3238             } else {
3239 1216         2647 for (0 .. $#$a) {
3240 12443 50       24527 prind($ind, "index [$_]") if $dump;
3241 12443         29692 push (@R, $recurse->($a->[$_], $b->[$_]))
3242             }
3243             }
3244             } elsif ($atp eq 'REF') {
3245             # Reference to reference.
3246 0         0 $recurse->($$a, $$b)
3247             } else {
3248 0         0 $mismatch->("cannot compare types $atp");
3249             }
3250 1813         11810 } return @R;
3251             }
3252            
3253             =item F
3254            
3255             Forks a process and waits for completion. The function will extract the exit-code, test whether
3256             the process died and prints status messages on F. F hence is a handy
3257             wrapper around the built-in F and F functions. Returns an array of three values:
3258            
3259             ($exit_code, $failed, $coredump)
3260            
3261             F<$exit_code> is -1 when the program failed to execute (e.g. it wasn't found or the current user
3262             has insufficient rights). Otherwise F<$exit_code> is between 0 and 255. When the program died on
3263             receipt of a signal (like F or F) then F<$signal> stores it. When F<$coredump> is
3264             true the program died and a F-file was written.
3265            
3266             =item F
3267            
3268             Concatenates and forms all TEXT strings into a symbolic name that can be used as a pathname.
3269             F is a useful function to concatenate strings and nearby converting all
3270             characters that do not qualify as filename-characters, into C<"_"> and C<"-">. The result cannot
3271             only be used as file- or URL name, but also (coinstantaneously) as hash key, database name etc.
3272            
3273             =back
3274            
3275             =cut
3276            
3277             sub fork_and_wait(@)
3278             {
3279 0     0 1 0 my $prog = shift;
3280 0         0 my($exit_code, $signal, $coredump);
3281 0         0 local $| = 1;
3282 0         0 system($prog, @_); # == 0 or die "\n\tfailed: $?";
3283 0 0       0 if ($? == -1) { # not found
    0          
3284 0         0 $exit_code = -1;
3285 0         0 print STDERR "\n\tfailed to execute program: $!\n";
3286             } elsif ($? & 127) { # died
3287 0         0 $exit_code = -1;
3288 0         0 $signal = ($? & 127);
3289 0         0 $coredump = ($? & 128);
3290 0 0       0 print STDERR "\n\tchild died with signal %d, %s core-dump\n", $signal, $coredump ? 'with' : 'without';
3291             } else { # ok
3292 0         0 $exit_code = $? >> 8;
3293 0 0       0 printf STDERR "\n\tchild exited with value %d\n", $exit_code, "\n" if $DEBUG;
3294             }
3295 0         0 return ($exit_code, $signal, $coredump)
3296             }
3297            
3298             sub synthesize_pathname(@)
3299             {
3300 0     0 1 0 my @s = @_;
3301 0         0 my($dch1, $dch2) = ('-', '_');
3302 0         0 join('_',
3303             map {
3304             # Unquote.
3305 0         0 s/^"(.+)"\z/$1/;
3306             # Escape all non-printables.
3307 0         0 $_ = escape7($_);
3308             # Undo \" \'
3309 0         0 s/\\(["'])/$1/go;
3310 0         0 s/[']/_/g;
3311 0         0 s/"(.+)"/$dch2$dch2$1$dch2$dch2/o; # "xxx" within string => __xxx__
3312             # Handle \NNN
3313 0         0 s/[\\]/0/g; # eg. \347 => 0347
3314             # Filename
3315 0         0 s/[\(\|\)\/:;]/$dch1/go; # ( | ) / : ; ==> -
3316 0         0 s/[\^<>:,;\"\$\s\?!\&\%\*]/$dch2/go; # ^ < > " $ ? ! & % * , ; : wsp => _
3317 0         0 s/^[\-\s]+|[\-\s]+\z//o;
3318 0         0 $_
3319             } @s
3320             )
3321             }
3322            
3323            
3324             =head2 Compile Options
3325            
3326             The format of the compiled text and the behavior of F> can be controlled by the OPTIONS
3327             parameter of F>, F> etc. The argument is a hash defining how the Rlist
3328             text shall be formatted. The following pairs are recognized:
3329            
3330             =over
3331            
3332             =item 'precision' =E PLACES
3333            
3334             Make F> round all numbers to PLACES decimal places, by calling F> on each
3335             scalar that L. By default PLACES is F, which means floats
3336             are not rounded.
3337            
3338             =item 'scientific' =E FLAG
3339            
3340             Causes F> to masquerade F<$Data::Rlist::RoundScientific>. See F>.
3341            
3342             =item 'code_refs' =E TOKEN
3343            
3344             Defines how F> shall treat F reference. Legal values for TOKEN are 0 (the
3345             default), C<"call"> and C<"deparse">.
3346            
3347             - 0 compiles subroutine references into the string C<"?CODE?">.
3348            
3349             - C<"call"> calls the code, then compiles the return value.
3350            
3351             - C<"deparse"> serializes the code using F (reproducing the Perl source).
3352            
3353             =item 'threads' =E COUNT
3354            
3355             If enabled F> internally use multiple threads. Note that can speedup compilation only
3356             on machines with at least COUNT CPUs.
3357            
3358             =item 'here_docs' =E FLAG
3359            
3360             If enabled strings with at least two newlines in them are written as
3361             L, when possible. To qualify as here-document a string has to have
3362             at least two LFs (C<"\n">), one of which must terminate it.
3363            
3364             =item 'auto_quote' =E FLAG
3365            
3366             When true (default) do not quote strings that look like identifiers (see F>). When
3367             false quote F strings. Hash keys are not affected.
3368            
3369             F> and F> interpret this flag differently: false means not to quote at
3370             all; true quotes only strings that don't look like numbers and that aren't yet quoted.
3371            
3372             =item 'outline_data' =E NUMBER
3373            
3374             When NUMBER is greater than 0 use C<"eol_space"> (linefeed) to split data to many lines. It will
3375             insert a linefeed after every NUMBERth array value.
3376            
3377             =item 'outline_hashes' =E FLAG
3378            
3379             If enabled, and C<"outline_data"> is also enabled, prints F<{> and F<}> on distinct lines when
3380             compiling Perl hashes with at least one pair.
3381            
3382             =item 'separator' =E STRING
3383            
3384             The comma-separator string to be used by F>. The default is C<','>.
3385            
3386             =item 'delimiter' =E REGEX
3387            
3388             Field-delimiter for F>. There is no default value. To read configuration files, for
3389             example, you may use C<'\s*=\s*'> or C<'\s+'>. To read CSV-files use e.g. C<'\s*[,;]\s*'>.
3390            
3391             =back
3392            
3393             The following options format the generated Rlist; normally you don't want to modify them:
3394            
3395             =over
3396            
3397             =item 'bol_tabs' =E COUNT
3398            
3399             Count of physical, horizontal TAB characters to use at the begin-of-line per indentation
3400             level. Defaults to 1. Note that we don't use blanks, because they blow up the size of generated
3401             text without measure.
3402            
3403             =item 'eol_space' =E STRING
3404            
3405             End-of-line string to use (the linefeed). For example, legal values are C<"">, C<" ">, C<"\n">,
3406             C<"\r\n"> etc. The default is F, which means to use the current value of F<$/>. Note that
3407             this is a compile-option that only affects F>. When parsing files the builtin
3408             F function is called, which uses F<$/>.
3409            
3410             =item 'paren_space' =E STRING
3411            
3412             String to write after F<(> and F<{>, and before F<}> and F<)> when compiling arrays and hashes.
3413            
3414             =item 'comma_punct' =E STRING
3415            
3416             =item 'semicolon_punct' =E STRING
3417            
3418             Comma and semicolon strings, which shall be at least C<","> and C<";">. No matter what,
3419             F> will always print the C<"eol_space"> string after the C<"semicolon_punct"> string.
3420            
3421             =item 'assign_punct' =E STRING
3422            
3423             String to make up key/value-pairs. Defaults to C<" = ">.
3424            
3425             =back
3426            
3427             =head2 Predefined Options
3428            
3429             The L parameter accepted by some package functions is either a hash-ref
3430             or the name of a predefined set:
3431            
3432             =over
3433            
3434             =item 'default'
3435            
3436             Default if writing to a file.
3437            
3438             =item 'string'
3439            
3440             Compact, no newlines/here-docs. Renders a "string of data".
3441            
3442             =item 'outlined'
3443            
3444             Optimize the compiled Rlist for maximum readability.
3445            
3446             =item 'squeezed'
3447            
3448             Very compact, no whitespace at all. For very large Rlists.
3449            
3450             =item 'perl'
3451            
3452             Compile data in Perl syntax, using F>, not F>. The output then
3453             can be F'd, but it cannot be F> back.
3454            
3455             =item 'fast' or F
3456            
3457             Compile data as fast as possible, using F>, not F>.
3458            
3459             =back
3460            
3461             All functions that define an L parameter do implicitly call
3462             F> to complete the argument from one of the predefined sets, and additionally
3463             from C<"default">. Therefore you can always define nothing, or a "lazy subset of options". For
3464             example,
3465            
3466             my $obj = new Data::Rlist(-data => $thing);
3467            
3468             $obj->write('thing.rls', { scientific => 1, precision => 8 });
3469            
3470             =head2 Exports
3471            
3472             Example:
3473            
3474             use Data::Rlist qw/:floats :strings/;
3475            
3476             =head3 Exporter Tags
3477            
3478             =over
3479            
3480             =item F<:floats>
3481            
3482             Imports F>, F> and F>.
3483            
3484             =item F<:strings>
3485            
3486             Imports F>, F>, F>, F>, F>,
3487             F>, F>, F>, F>, F>, and
3488             F>.
3489            
3490             =item F<:options>
3491            
3492             Imports F> and F>.
3493            
3494             =item F<:aux>
3495            
3496             Imports F>, F> and F>.
3497            
3498             =back
3499            
3500             =head3 Auto-Exported Functions
3501            
3502             The following functions are implicitly imported into the callers symbol table. (But you may say
3503             F instead of F to prohibit auto-import. See also
3504             L.)
3505            
3506             =over
3507            
3508             =item F
3509            
3510             =item F
3511            
3512             =item F
3513            
3514             These are aliases for F>, F> and
3515             F>.
3516            
3517             =item F
3518            
3519             Like F> but implicitly call F> in case parsing
3520             was successful.
3521            
3522             =item F
3523            
3524             =item F
3525            
3526             =item F
3527            
3528             These are aliases for F>, F>
3529             F> and F>. OPTIONS default to C<"default">.
3530            
3531             =item F
3532            
3533             =item F
3534            
3535             =item F
3536            
3537             These are aliases for F>. F applies the
3538             predefined L|/Predefined Options> options, while F applies
3539             L|/Predefined Options> and F() L|/Predefined Options>. When
3540             specified, OPTIONS are merged into the. For example,
3541            
3542             print "\n\$thing: ", OutlineData($thing, { precision => 12 });
3543            
3544             F> all numbers in F<$thing> to 12 digits.
3545            
3546             =item F
3547            
3548             An alias for
3549            
3550             print OutlineData(DATA, OPTIONS);
3551            
3552             =item F
3553            
3554             =item F
3555            
3556             These are aliases for F> and F>. For example,
3557            
3558             use Data::Rlist;
3559             .
3560             .
3561             my($copy, $as_text) = KeelhaulData($thing);
3562            
3563             =back
3564            
3565             =cut
3566            
3567             sub ReadCSV($;$$$) {
3568 0     0 1 0 my($input, $options, $fcmd, $fcmdargs) = @_;
3569 0         0 return Data::Rlist::read_csv($input, $options, $fcmd, $fcmdargs);
3570             }
3571            
3572             sub ReadConf($;$$$) {
3573 0     0 1 0 my($input, $options, $fcmd, $fcmdargs) = @_;
3574 0         0 return Data::Rlist::read_conf($input, $options, $fcmd, $fcmdargs);
3575             }
3576            
3577             sub ReadData($;$$) {
3578 22     22 1 1455 my($input, $fcmd, $fcmdargs) = @_;
3579 22         63 return Data::Rlist::read($input, $fcmd, $fcmdargs);
3580             }
3581            
3582             sub EvaluateData($;$$) {
3583 1     1 1 2 my($input, $fcmd, $fcmdargs) = @_;
3584 1         5 my $result = ReadData($input, $fcmd, $fcmdargs);
3585 1         9 my $count = Data::Rlist::evaluate_nanoscripts();
3586 1         4 return $result;
3587             }
3588            
3589            
3590             sub WriteCSV($;$$$$) {
3591 0     0 1 0 my($data, $output, $options, $columns, $header) = @_;
3592 0   0     0 $options ||= 'default';
3593 0         0 Data::Rlist::write_csv($data, $output, $options, $columns, $header);
3594             }
3595            
3596             sub WriteConf($;$$$) {
3597 0     0 1 0 my($data, $output, $options, $header) = @_;
3598 0   0     0 $options ||= 'default';
3599 0         0 Data::Rlist::write_conf($data, $output, $options, $header);
3600             }
3601            
3602             sub WriteData($;$$$) {
3603 6     6 1 714 my($data, $output, $options, $header) = @_;
3604 6   100     28 $options ||= 'default'; # when undef uses 'default'
3605 6         21 Data::Rlist::write($data, $output, $options, $header);
3606             }
3607            
3608             sub PrintData($;$) { # return outlined data as string-value
3609 0     0 1 0 my($data, $options) = @_;
3610 0         0 print OutlineData($data, $options);
3611             }
3612            
3613             sub OutlineData($;$) { # return outlined data as string-ref
3614 3     3 1 8 my($data, $options) = @_;
3615 3         9 return Data::Rlist::write_string_value($data, complete_options($options, 'outlined'));
3616             }
3617            
3618             sub StringizeData($;$) { # return data as compact string-ref (no newlines)
3619 0     0 1 0 my($data, $options) = @_;
3620 0         0 return Data::Rlist::write_string_value($data, complete_options($options, 'string'));
3621             }
3622            
3623             sub SqueezeData($;$) { # return data as super-compact string-ref (no whitespace at all)
3624 0     0 1 0 my($data, $options) = @_;
3625 0         0 return Data::Rlist::write_string_value($data, complete_options($options, 'squeezed'));
3626             }
3627            
3628             sub KeelhaulData($;$) { # recursively copy data
3629 128     128 1 7067 my($data, $options) = @_;
3630 128         511 return Data::Rlist::keelhaul($data, $options);
3631             }
3632            
3633             sub CompareData($$;$$) { # recursively compare data
3634 267     267 1 10371 my($a, $b, $prec, $dump) = @_;
3635 267         1128 return Data::Rlist::deep_compare($a, $b, $prec, $dump);
3636             }
3637            
3638             =head1 EXAMPLES
3639            
3640             String- and number values:
3641            
3642             "Hello, World!"
3643             foo # compiles to { 'foo' => undef }
3644             3.1415 # compiles to { 3.1415 => undef }
3645            
3646             Array values:
3647            
3648             (1, a, 4, "b u z") # list of numbers/strings
3649            
3650             ((1, 2),
3651             (3, 4)) # list of list (4x4 matrix)
3652            
3653             ((1, a, 3, "foo bar"),
3654             (7, c, 0, "")) # another list of lists
3655            
3656             Here-document strings:
3657            
3658             $hello = ReadData(\<
3659             ( <
3660             Hallo Welt!
3661             DEUTSCH
3662             Hello World!
3663             ENGLISH
3664             Bonjour le monde!
3665             FRANCAIS
3666             Ola mundo!
3667             CASTELLANO
3668             ~ nuqneH { ~ 'u' ~ nuqneH disp disp } name
3669             nuqneH
3670             KLINGON
3671             ++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>+.+++++++
3672             ..+++.>++.<<+++++++++++++++.>.+++.------.--------.>+.>.
3673             BRAINF_CK
3674             HELLO
3675            
3676             Compiles F<$hello> as
3677            
3678             [ "Hallo Welt!\n", "Hello World!\n", "Bonjour le monde!\n", "Ola mundo!\n",
3679             "~ nuqneH { ~ 'u' ~ nuqneH disp disp } name\n",
3680             "++++++++++[>+++++++>++++++++++>+++>+<<<<-]>++.>+.+++++++\n..+++.>++.<<+++++++++++++++.>.+++.------.--------.>+.>.\n" ]
3681            
3682             Configuration object as hash:
3683            
3684             {
3685             contribution_quantile = 0.99;
3686             default_only_mode = Y;
3687             number_of_runs = 10000;
3688             number_of_threads = 10;
3689             # etc.
3690             }
3691            
3692             Altogether:
3693            
3694             Metaphysic-terms =
3695             {
3696             Numbers =
3697             {
3698             3.141592653589793 = "The ratio of a circle's circumference to its diameter.";
3699             2.718281828459045 = <<___;
3700             The mathematical constant "e" is the unique real number such that the value of
3701             the derivative (slope of the tangent line) of f(x) = e^x at the point x = 0 is
3702             exactly 1.
3703             ___
3704             42 = "The Answer to Life, the Universe, and Everything.";
3705             };
3706            
3707             Words =
3708             {
3709             ACME = <
3710             A fancy-free Company [that] Makes Everything: Wile E. Coyote's supplier of equipment and gadgets.
3711             Value
3712             <
3713             foo bar foobar
3714             Key
3715             [JARGON] A widely used meta-syntactic variable; see foo for etymology. Probably
3716             originally propagated through DECsystem manuals [...] in 1960s and early 1970s;
3717             confirmed sightings go back to 1972. [...]
3718             Value
3719             };
3720             };
3721            
3722             =head1 NOTES
3723            
3724             The F (Rlist) syntax is inspired by NeXTSTEP's F. But Rlist is
3725             simpler, more readable and more portable. The Perl and C++ implementations are fast, stable and
3726             free. Markus Felten, with whom I worked a few month in a project at Deutsche Bank, Frankfurt in
3727             summer 1998, arrested my attention on Property lists. He had implemented a Perl variant of it
3728             (F>).
3729            
3730             The term "Random" underlines the fact that the language
3731            
3732             =over
3733            
3734             =item *
3735            
3736             has four primitive/anonymuous types;
3737            
3738             =item *
3739            
3740             the basic building block is a list, which is combined at random with other lists.
3741            
3742             =back
3743            
3744             Hence the term F does not mean F or F. F are
3745             F lists.
3746            
3747             =head1 F
3748            
3749             The main difference between F and F is that scalars will be properly
3750             encoded as number or string. F writes numbers always as quoted strings, for example
3751            
3752             $VAR1 = {
3753             'configuration' => {
3754             'verbose' => 'Y',
3755             'importance_sampling_loss_quantile' => '0.04',
3756             'distribution_loss_unit' => '100',
3757             'default_only' => 'Y',
3758             'num_threads' => '5',
3759             .
3760             .
3761             }
3762             };
3763            
3764             where F writes
3765            
3766             {
3767             configuration = {
3768             verbose = Y;
3769             importance_sampling_loss_quantile = 0.04;
3770             distribution_loss_unit = 100;
3771             default_only = Y;
3772             num_threads = 5;
3773             .
3774             .
3775             };
3776             }
3777            
3778             As one can see F writes the data right in Perl syntax, which means the dumped text
3779             can be simply F'd, and the data can be restored very fast. Rlists are not quite Perl-syntax:
3780             a dedicated parser is required. But therefore Rlist text is portable and can be read from other
3781             programming languages such as L.
3782            
3783             With F<$Data::Dumper::Useqq> enabled it was observed that F renders output
3784             significantly slower than F>. This is actually suprising, since F tests
3785             for each scalar whether it is numeric, and truely quotes/escapes strings. F quotes
3786             all scalars (including numbers), and it does not escape strings. This may also result in some odd
3787             behaviors. For example,
3788            
3789             use Data::Dumper;
3790             print Dumper "foo\n";
3791            
3792             yields
3793            
3794             $VAR1 = 'foo
3795             ';
3796            
3797             while
3798            
3799             use Data::Rlist;
3800             PrintData "foo\n"
3801            
3802             yields
3803            
3804             { "foo\n"; }
3805            
3806             Finally, F generates smaller files. With the default F<$Data::Dumper::Indent> of 2
3807             F's output is 4-5 times that of F's. This is because F
3808             recklessly uses blanks, instead of horizontal tabulators, which blows up file sizes without
3809             measure.
3810            
3811             =head2 Rlist vs. Perl Syntax
3812            
3813             Rlists are not Perl syntax:
3814            
3815             RLIST PERL
3816             ----- ----
3817             5; { 5 => undef }
3818             "5"; { "5" => undef }
3819             5=1; { 5 => 1 }
3820             {5=1;} { 5 => 1 }
3821             (5) [ 5 ]
3822             {} { }
3823             ; { }
3824             () [ ]
3825            
3826             =head2 Debugging Data
3827            
3828             To reduce recursive data structures (into true hierachies) set F<$Data::Rlist::MaxDepth> to an
3829             integer above 0. It then defines the depth under which F> shall not venture deeper.
3830             The compilation of Perl data (into Rlist text) then continues, but on F a message like the
3831             following is printed:
3832            
3833             ERROR: compile2() broken in deep ARRAY(0x101aaeec) (depth = 101, max-depth = 100)
3834            
3835             This message will also be repeated as comment when the compiled Rlist is written to a file.
3836             Furthermore F<$Data::Rlist::Broken> is incremented by one. While the compilation continues,
3837             effectively any attempt to venture deeper as suggested by F<$Data::Rlist::MaxDepth> will be
3838             blocked.
3839            
3840             See F>.
3841            
3842             =head2 Speeding up Compilation (Explicit Quoting)
3843            
3844             Much work has been spent to optimize F for speed. Still it is implemented in pure
3845             Perl (no XS). A rough estimation for Perl 5.8 is "each MB takes one second per GHz". For example,
3846             when the resulting Rlist file has a size of 13 MB, compiling it from a Perl script on a 3-GHz-PC
3847             requires about 5-7 seconds. Compiling the same data under Solaris, on a sparcv9 processor
3848             operating at 750 MHz, takes about 18-22 seconds.
3849            
3850             The process of compiling can be speed up by calling F> explicitly on scalars. That is,
3851             before calling F> or F>. Big data sets may compile faster when for
3852             scalars, that certainly not qualify as symbolic name, F> is called in advance:
3853            
3854             use Data::Rlist qw/:strings/;
3855            
3856             $data{quote7($key)} = $value;
3857             .
3858             .
3859             Data::Rlist::write("data.rlist", \%data);
3860            
3861             instead of
3862            
3863             $data{$key} = $value;
3864             .
3865             .
3866             Data::Rlist::write("data.rlist", \%data);
3867            
3868             It depends on the case whether the first variant is faster: F> and F>
3869             both have to call F> on each scalar. When the scalar is already quoted, i.e.,
3870             its first character is C<">, this test ought to run faster.
3871            
3872             Internally F> applies the precompiled regex F<$Data::Rlist::REValue>. Note that
3873             the expression S> can be up to 20% faster than the equivalent
3874             F.
3875            
3876             =head2 Quoting strings that look like numbers
3877            
3878             Normally you don't have to care about strings, since un/quoting happens as required when
3879             reading/compiling Rlist or CSV text. A common problem, however, occurs when some string uses the
3880             same lexicography than numbers do.
3881            
3882             Perl defines the string as the basic building block for all program data, then lets the program
3883             decide F. Analogical, in a printed book the reader has to decipher the glyphs
3884             and decide what evidence they hide. Printed text uses well-defined glyphs and typographic
3885             conventions, and finally the competence of the reader, to recognize numbers. But computers need to
3886             know the exact number type and format. Integer? Float? Hexadecimal? Scientific? Klingon? The
3887             Perl Cookbook recommends the use of a regular expression to distinguish number from string scalars
3888             (recipe 2.1).
3889            
3890             In Rlist, string scalars that look like numbers need to be quoted explicitly. Otherwise, for
3891             example, the string scalar C<"-3.14"> appears as F<-3.14> in the output, C<"007324"> is compiled
3892             into 7324 etc. Such text is lost and read back as a number. Of course, in most cases this is just
3893             what you want. For hash keys, however, it might be a problem. One solution is to prefix the string
3894             with C<"_">:
3895            
3896             my $s = '-9'; $s = "_$s";
3897            
3898             Such strings do not qualify as a number anymore. In the C++ implementation it will then become
3899             some F, not a F. But the leading C<"_"> has to be removed by the reading
3900             program. Perhaps a better solution is to explicitly call F>:
3901            
3902             use Data::Rlist qw/:strings/;
3903            
3904             $k = -9;
3905             $k = quote7($k); # returns qq'"-9"'
3906            
3907             $k = 3.14_15_92;
3908             $k = quote7($k); # returns qq'"3.141592"'
3909            
3910             Again, the need to quote strings that look like numbers is a problem evident only in the Perl
3911             implementation of Rlist, since Perl is a language with weak types. With the C++ implementation of
3912             Rlist there's no need to quote strings that look like numbers.
3913            
3914             See also F>, F>, F>, F> and
3915             F>.
3916            
3917             =head2 Installing F locally
3918            
3919             Installing CPAN packages usually requires administrator privileges. Another way is to copy the
3920             F file into a directory of your choice. Instead of F, however, you
3921             then use the following code. It will find F also in F<.> and F<~/bin>, and it calls the
3922             F explicitly:
3923            
3924             BEGIN {
3925             $0 =~ /[^\/]+$/;
3926             push @INC, $`||'.', "$ENV{HOME}/bin";
3927             require Rlist;
3928             Data::Rlist->import();
3929             Data::Rlist->import(qw/:floats :strings/);
3930             }
3931            
3932             =head2 An Rlist-Mode for Emacs
3933            
3934             (define-generic-mode 'rlist-generic-mode
3935             (list "//" ?#)
3936             nil
3937             '(;; Punctuators
3938             ("\\([(){},;?=]\\)" 1 'cperl-array-face)
3939             ;; Numbers
3940             ("\\([-+]?[0-9]+\\(\\.[0-9]+\\)?[dDlL]?\\)" 1 'font-lock-constant-face)
3941             ;; Identifier names
3942             ("\\([-~A-Za-z_][-~A-Za-z0-9_]+\\)" 1 'font-lock-variable-name-face))
3943             (list "\\.[rR][lL][iI]?[sS]$")
3944             ;; Extra functions to setup mode.
3945             (list 'generic-bracket-support
3946             '(lambda()
3947             (require 'cperl-mode)
3948             ;;(hl-line-mode t) ; highlight cursor-line
3949             (local-set-key [?\t] (lambda()(interactive)(cperl-indent-command)))
3950             (local-set-key [?\M-q] 'fill-paragraph)
3951             (set-fill-column 100)))
3952             "Generic mode for Random Lists (Rlist) files.")
3953            
3954             =head2 Implementation Details
3955            
3956             =head3 Perl
3957            
3958             =head4 Package Dependencies
3959            
3960             F depends only on few other packages:
3961            
3962             Exporter
3963             Carp
3964             strict
3965             integer
3966             Sys::Hostname
3967             Scalar::Util # deep_compare() only
3968             Text::Wrap # unhere() only
3969             Text::ParseWords # split_quoted(), parse_quoted() only
3970            
3971             F is free of F<$&>, F<$`> or F<$'>. Reason: once Perl sees that you need one of these
3972             meta-variables anywhere in the program, it has to provide them for every pattern match. This may
3973             substantially slow your program (see also L).
3974            
3975             =head4 A Short Story of Typeglobs
3976            
3977             This is supplement information for F>, the function internally called by F>
3978             and F>. We will discuss why F>, F> and
3979             F> transliterate typeglobs and typeglob-refs into C<"?GLOB?">. This is an
3980             attempted explanation.
3981            
3982             B
3983            
3984             Perl uses a symbol table per package to map symbolic names like F to Perl values. Typeglob (aka
3985             glob) objects are complete symbol table entries, as hash values. The symbol table hash (F)
3986             is an ordinary hash, named like the package with two colons appended. In the package stash the
3987             symbol name is mapped to a memory address which holds the actual data of your program. In Perl we
3988             do not have real global values, only package globals. Any Perl code is always running in one
3989             package or another.
3990            
3991             The main symbol table's name is F<%main::>, or F<%::>. In the C implementation of the Perl
3992             interpreter, the main symbol is simply a global variable, called the F (default stash).
3993             The symbol F in stash F<%::> addresses the stash of package F, and the symbol
3994             F in the stash F<%::Data::> addresses the stash of package F.
3995            
3996             Typeglobs are an idiosyncracy of Perl: different types need only one stash entry, so that one
3997             symbol can name all types of Perl data (scalars, arrays, hashes) and nondata (functions, formats,
3998             I/O handles). The symbol F is mapped to the typeglob F<*x>. In the typeglob coexist the scalar
3999             F<$x>, the list F<@x>, the hash F<%x>, the code F<&x> and the I/O-handle or format specifieer F.
4000            
4001             Most of the time only one glob slot is used. Do typeglobs waste space then? Probably not.
4002             (Although some authors believe that.) Other script languages like (e.g.) Python is not forcing
4003             decoration characters -- the interpreter already knows the type. In terms of C, symbol table
4004             entries are then struct/union-combinations with a type field, a F field, a F field
4005             and so forth. Perl symbols follow a contrary design: globs are really pointer sets to low-level
4006             structs that hold numbers, strings etc. Naturally pointers to non-existing values are NULL, and so
4007             no type field is required. Perl interpreters can now implement fine-grained smart-pointers for
4008             reference-counting and copy-on-write, and must not necessarily handle abstract unions. In theory,
4009             the garbage-collector should have "increased recycling opportunities." We do know, for example,
4010             that F is very greedy with RAM: it almost never returns any memory to the operating system.
4011            
4012             Modifying F<$x> in a Perl program won't change F<%x>, because the typeglob F<*x> is interposed
4013             between the stash and the program's actual values for F<$x>, F<@x> etc. The sigil F<*> serves as
4014             wildcard for the other sigils F<%>, F<@>, F<$> and F<&>. (Hint: a F is a symbol "created for
4015             a specific magical purpose"; the name derives from the latin F = seal.)
4016            
4017             Typeglobs cannot be dissolved by F>, because when (e.g.) F<$x> and F<%x> are in use,
4018             the glob F<*x> does not return some useful value like
4019            
4020             (SCALAR => \$x, HASH => \@x)
4021            
4022             Typeglobs are also not interpolated in strings. F always plays the ball back. A
4023             typeglob-value is simply a string:
4024            
4025             $ perl -e '$x=1; @x=(1); print *x'
4026             *main::x
4027            
4028             $ perl -e 'print "*x is not interpolated"'
4029             *x is not interpolated
4030            
4031             $ perl -e '$x = "this"; print "although ".*x." could be a string"'
4032             although *main::x could be a string
4033            
4034             As one can see, even when only F<$x> is defined the F<*x> does not return its value. Typeglobs
4035             (stash entries) are arranged by F on the fly, even with the F pragma in effect:
4036            
4037             $ perl -e 'package nirvana; use strict; print *x'
4038             *nirvana::x
4039            
4040             Each typeglob is a full path into the F stashes, down from the F:
4041            
4042             $ perl -e 'print "*x is \"*main::x\"" if *x eq "*main::x"'
4043             *x is "*main::x"
4044            
4045             $ perl -e 'package nirvana; sub f { local *g=shift; print *g."=$g" }; package main; $x=42; nirvana::f(*x)'
4046             *main::x=42
4047            
4048             B
4049            
4050             In the C implementation of Perl, typeglobs have the struct-type F for "Glob value". Each F
4051             is merely a set of pointers to sub-objects for scalars, arrays, hashes etc. In Perl the special
4052             syntax F<*x{ARRAY}> accesses the array-sub-object, and is another way to say F<\@x>. But when
4053             applied to a typeglob as F<\*foo> it returns a typeglob-ref, or globref. So the Perl backslash
4054             operator C<\> works like the address-of operator C<&> in C.
4055            
4056             $ perl -e 'print *::'
4057             *main::main:: # ???
4058            
4059             $ perl -e '$x = 42; print $::{x}'
4060             *main::x # typeglob-value 'x' in the stash
4061            
4062             $ perl -e 'print \*::'
4063             GLOB(0x10010f08) # some globref
4064            
4065             Little do we know what happens inside F, when we assign REFs to typeglobs:
4066            
4067             $ perl -e '$x = 42; *x = \$x; print $x'
4068             42
4069             $ perl -e '$y = 42; *x = \$y; print $x'
4070             42
4071            
4072             In Perl4 you had to pass typeglob-refs to call functions by references (the backslash-operator was
4073             not yet "invented"). Since Perl5 saw the light of day, typeglob-refs can be considered as
4074             artefacts. Note, however, that these veterans are still faster than true references, because true
4075             references are themselves stored in a typeglob (as REF type) and so need to be dereferenced.
4076             Globrefs can be used directly (as F's) by F. For example,
4077            
4078             void f1 { my $bar = shift; ++$$bar }
4079             void f2 { local *bar = shift; ++$bar }
4080            
4081             f1(\$x); # increments $x
4082             f1(*x); # dto., but faster
4083            
4084             B
4085            
4086             Typeglob-aliases offer another interesting application for typeglobs. For example, S>
4087             aliases the symbol F in the current stash, so that F and F point to the same typeglob.
4088             This means that when you declare S> after casting the alias, F is F.
4089            
4090             This smells like a free lunch. The penalty, however, is that the F symbol cannot be easily
4091             removed from the stash. One way is to say F, wich temporarily assigns a new typeglob
4092             to F with all pointers zeroized:
4093            
4094             package nirvana;
4095            
4096             sub f { print $bar; }
4097             sub g { local *bar; $bar = 42; f(); }
4098            
4099             package main;
4100            
4101             nirvana::g();
4102            
4103             Running this code as Perl script prints the number assigned in F. F acts as a closure. The
4104             F-statement will put the F symbol temporarily into the package stash F<%::nirvana>,
4105             i.e., the same stash in which F and F exist. It will remove F when F returns.
4106            
4107             B<*foo{THINGS}s>
4108            
4109             The F<*x{NAME}> expression family is fondly called "the F<*foo{THING}> syntax":
4110            
4111             $scalarref = *x{SCALAR};
4112             $arrayref = *ARGV{ARRAY};
4113             $hashref = *ENV{HASH};
4114             $coderef = *handlers{CODE};
4115            
4116             $ioref = *STDIN{IO};
4117             $ioref = *STDIN{FILEHANDLE}; # same as *STDIN{IO}
4118            
4119             $globref = *x{GLOB};
4120             $globref = \*x; # same as *x{GLOB}
4121             $undef = *x{THIS_NAME_IS_NOT_SUPPORTED} # yields undef
4122            
4123             die unless defined *x{SCALAR}; # ok -> will not die
4124             die unless defined *x{GLOB}; # ok
4125             die unless defined *x{HASH}; # error -> will die
4126            
4127             When THINGs are accessed this way few rules apply. Firstofall, F<*foo{THING}s> are not hashes. The
4128             syntax is a stopgap:
4129            
4130             $ perl -e 'print \*x, *x{GLOB}, \*x{GLOB}'
4131             GLOB(0x100110b8)GLOB(0x100110b8)REF(0x1002e944)
4132            
4133             $ perl -e '$x=1; exists *x{GLOB}'
4134             exists argument is not a HASH or ARRAY element at -e line 1.
4135            
4136             Some F<*foo{THING}> is F if the requested THING hasn't been used yet. Only F<*foo{SCALAR}>
4137             returns an anonymous scalar-reference:
4138            
4139             $ perl -e 'print "nope" unless defined *foo{HASH}'
4140             nope
4141             $ perl -e 'print *foo{SCALAR}'
4142             SCALAR(0x1002e94c)
4143            
4144             In Perl5 it is still not possible to get a reference to an I/O-handle (file-, directory- or socket
4145             handle) using the backslash operator. When a function requires an I/O-handle you must therefore
4146             pass a globref. More precisely, it is possible to pass an F-reference, a typeglob or a
4147             typeglob-ref as the filehandle. This is obscure bot only for new Perl programmers.
4148            
4149             sub logprint($@) {
4150             my $fh = shift;
4151             print $fh map { "$_\n" } @_;
4152             }
4153            
4154             logprint(*STDOUT{IO}, 'foo'); # pass IO-handle -> IO::Handle=IO(0x10011b44)
4155             logprint(*STDOUT, 'bar'); # ok, pass typeglob-value -> '*main::STDOUT'
4156             logprint(\*STDOUT, 'bar'); # ok, pass typeglob-ref -> 'GLOB(0x10011b2c)'
4157             logprint(\*STDOUT{IO}, 'nope'); # ERROR -> won't accept 'REF(0x10010fe0)'
4158            
4159             It is very amusing that Perl, although refactoring UNIX in form of a language, does not make clear
4160             what a file- or socket-handle is. The global symbol STDOUT is actually an F object,
4161             which F had silently instantiated. To functions like F, however, you may pass an
4162             F, globname or globref.
4163            
4164             B
4165            
4166             As we saw we can access the Perl guts without using a scalpel. Suprisingly, it is also possible to
4167             touch the stashes themselves:
4168            
4169             $ perl -e '$x = 42; *x = $x; print *x'
4170             *main::42
4171            
4172             $ perl -e '$x = 42; *x = $x; print *42'
4173             *main::42
4174            
4175             By assigning the scalar value F<$x> to F<*x> we have demolished the stash (at least, logically):
4176             neither F<$42> nor F<$main::42> are accessible. Symbols like F<42> are invalid, because 42 is a
4177             numeric literal, not a string literal.
4178            
4179             $ perl -e '$x = 42; *x = $x; print $main::42'
4180            
4181             Nevertheless it is easy to confuse F this way:
4182            
4183             $ perl -e 'print *main::42'
4184             *main::42
4185            
4186             $ perl -e 'print 1*9'
4187             9
4188            
4189             $ perl -e 'print *9'
4190             *main::9
4191            
4192             $ perl -e 'print *42{GLOB}'
4193             GLOB(0x100110b8)
4194            
4195             $ perl -e '*x = 42; print $::{42}, *x'
4196             *main::42*main::42
4197            
4198             $ perl -v
4199             This is perl, v5.8.8 built for cygwin-thread-multi-64int
4200             (with 8 registered patches, see perl -V for more detail)
4201            
4202             Of course these behaviors are not reliable, and may disappear in future versions of F. In
4203             German you say "Schmutzeffekt" (dirt effect) for certain mechanical effects that occur
4204             non-intendedly, because machines and electrical circuits are not perfect, and so is software.
4205             However, "Schmutzeffekts" are neither bugs nor features; these are phenomenons.
4206            
4207             B
4208            
4209             Lexical variables (F variables) are not stored in stashes, and do not require typeglobs. These
4210             variables are stored in a special array, the F, assigned to each block, subroutine, and
4211             thread. These are really private variables, and they cannot be Fized. Each lexical variable
4212             occupies a slot in the scratchpad; hence is addressed by an integer index, not a symbol. F
4213             variables are like F variables in C. They're also faster than Fs, because they can be
4214             allocated at compile time, not runtime. Therefore you cannot declare F<*x> lexically:
4215            
4216             $ perl -e 'my(*x)'
4217             Can't declare ref-to-glob cast in "my" at -e line 1, near ");"
4218            
4219             Seel also the Perl man-pages L, L, L and L.
4220            
4221             =head3 C++
4222            
4223             In C++ we use a F/F scanner/parser combination to read Rlist language productions.
4224             The C++ parser generates an F (AST) of F, F,
4225             F and F values. Since each value is put into the AST, as separate object,
4226             we use a free store management that allows the allocation of huge amounts of tiny objects.
4227            
4228             We also use reference-counted smart-pointers, which allocate themselves on our fast free store. So
4229             RAM will not be fragmented, and the allocation of RAM is significantly faster than with the default
4230             process heap. Like with Perl, Rlist files can have hundreds of megabytes of data (!), and are
4231             processable in constant time, with constant memory requirements. For example, a 300 MB Rlist-file
4232             can be read from a C++ process which will not peak over 400-500 MB of process RAM.
4233            
4234             =head1 BUGS
4235            
4236             There are no known bugs, this package is stable. Deficiencies and TODOs:
4237            
4238             =over
4239            
4240             =item *
4241            
4242             The C<"deparse"> functionality for the C<"code_refs"> L has not
4243             yet been implemented.
4244            
4245             =item *
4246            
4247             The C<"threads"> L has not yet been implemented.
4248            
4249             =item *
4250            
4251             IEEE 754 notations of Infinite and NaN not yet implemented.
4252            
4253             =item *
4254            
4255             F> is experimental.
4256            
4257             =back
4258            
4259             =head1 COPYRIGHT/LICENSE
4260            
4261             Copyright 1998-2008 Andreas Spindler
4262            
4263             Maintained at CPAN (F>) and the author's site
4264             (F>). Please send mail to F.
4265            
4266             This library is free software; you can redistribute it and/or modify it under the same terms as
4267             Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have
4268             available.
4269            
4270             Contact the author for the C++ library at F.
4271            
4272             Thank you for your attention.
4273            
4274             =cut
4275            
4276             1;
4277            
4278             ### Local Variables:
4279             ### buffer-file-coding-system: iso-latin-1
4280             ### fill-column: 99
4281             ### End: