File Coverage

blib/lib/Config/JSON/Enhanced.pm
Criterion Covered Total %
statement 106 121 87.6
branch 37 48 77.0
condition 12 23 52.1
subroutine 10 10 100.0
pod 1 1 100.0
total 166 203 81.7


line stmt bran cond sub pod time code
1             package Config::JSON::Enhanced;
2              
3 13     13   1881674 use 5.010;
  13         191  
4 13     13   74 use strict;
  13         27  
  13         330  
5 13     13   69 use warnings;
  13         28  
  13         632  
6              
7             our $VERSION = '0.07';
8              
9 13     13   91 use strict;
  13         41  
  13         330  
10 13     13   84 use warnings;
  13         50  
  13         524  
11              
12             # which loads JSON::XS with a purel-perl JSON fallback
13 13     13   6427 use JSON;
  13         109359  
  13         127  
14              
15 13     13   7294 use Data::Roundtrip qw/json2perl perl2dump no-unicode-escape-permanently/;
  13         442695  
  13         97  
16              
17 13     13   1484 use Exporter; # we have our own import() don't import it
  13         33  
  13         20558  
18             our @ISA = qw(Exporter);
19             our @EXPORT = qw/
20             config2perl
21             /;
22              
23             # Convert enhanced JSON string into a Perl data structure.
24             # The input parameters hashref:
25             # * specify where is the content to be parsed via:
26             # 'filename',
27             # 'filehandle', or,
28             # 'string'
29             # * optional 'commentstyle' is a string of comma separated
30             # commentstyles (valid styles are C, CPP, shell)
31             # * optional 'variable-substitutions' is a hashref with
32             # keys as template variable names to be substutited
33             # inside the content with their corresponding values.
34             # For example {'xx' => 'hello'} will substitute
35             # <% xx %> with hello
36             # * optional 'remove-comments-in-strings' to remove comments from JSON strings
37             # (both keys and values), default is to KEEP anything inside a string
38             # even if it looks like comments we are supposed to remove (because string
39             # can be a bash script, for example).
40             # * optional 'debug' for setting verbosity, default is zero.
41             #
42             # It returns the created Perl data structure or undef on failure.
43             sub config2perl {
44 571   50 571 1 2980511 my $params = shift // {};
45              
46 571         979 my $contents;
47 571 100 66     4448 if( exists($params->{'filename'}) && defined(my $infile=$params->{'filename'}) ){
    100 66        
    50 33        
48 15         41 my $fh;
49 15 50   6   833 if( ! open $fh, '<:encoding(UTF-8)', $infile ){ warn __PACKAGE__.'::configfile2perl()'." (line ".__LINE__.") : error, failed to open file '$infile' for reading, $!"; return undef }
  0         0  
  0         0  
  6         55  
  6         12  
  6         61  
50 15         10114 { local $/ = undef; $contents = <$fh> }; close $fh;
  15         111  
  15         709  
  15         858  
51             } elsif( exists($params->{'filehandle'}) && defined(my $fh=$params->{'filehandle'}) ){
52 7         25 { local $/ = undef; $contents = <$fh> }
  7         35  
  7         368  
53             # we are not closing the filehandle, it is caller-specified, so caller responsibility
54             } elsif( exists($params->{'string'}) && defined($params->{'string'}) ){
55 549         1191 $contents = $params->{'string'};
56             }
57 571 50       1297 if( ! defined $contents ){ warn __PACKAGE__.'::configfile2perl()'." (line ".__LINE__.") : error, one of 'filename', 'filehandle' or 'string' must be specified in the parameters hash as the source of the configuration contents."; return undef }
  0         0  
  0         0  
58              
59             my $debug = exists($params->{'debug'}) && defined($params->{'debug'})
60 571 50 33     1709 ? $params->{'debug'} : 0
61             ;
62              
63             my $commentstyle = exists($params->{'commentstyle'}) && defined($params->{'commentstyle'})
64 571 50 33     2183 ? $params->{'commentstyle'} : 'C'
65             ;
66              
67 571         959 my ($tvop, $tvcl);
68 571 100 66     1448 if( exists($params->{'tags'}) && defined($params->{'tags'}) ){
69 1 50       18 if( ref($params->{'tags'}) ne 'ARRAY' ){ warn __PACKAGE__.'::configfile2perl()'." (line ".__LINE__.") : error, input parameter 'tags' must be an ARRAYref of exactly 2 items and not a ".ref($params->{'tags'})."."; return undef }
  0         0  
  0         0  
70 1 50       2 if( scalar(@{ $params->{'tags'} }) != 2 ){ warn __PACKAGE__.'::configfile2perl()'." (line ".__LINE__.") : error, input parameter 'tags' must be an ARRAYref of exactly 2 items and not ".scalar(@{ $params->{'tags'} })."."; return undef }
  1         6  
  0         0  
  0         0  
  0         0  
71 1         2 ($tvop, $tvcl) = @{ $params->{'tags'} };
  1         4  
72 570         1058 } else { $tvop = '<%'; $tvcl = '%>' }
  570         896  
73              
74             my $tsubs = exists($params->{'variable-substitutions'})
75 571 100       1174 ? $params->{'variable-substitutions'} : undef
76             ;
77              
78             # remove comments inside strings? default is NO, keep comments if inside strings
79             # because they may not be our comments (e.g. string contains a bash script)
80             my $remove_comments_in_strings = exists($params->{'remove-comments-in-strings'}) && defined($params->{'remove-comments-in-strings'})
81 571 100 66     1454 ? $params->{'remove-comments-in-strings'} : 0
82             ;
83              
84             # firstly, substitute templated variables if any
85             # with the user-specified data.
86             # This includes ANYTHNING in the input enhanced JSON including
87             # verbatim sections, keys, values, etc.
88             # The opening and closing tags of vars are user-specified
89             # and are NOT allowed to contain spaces in between
90             # (e.g. '< %' will not be matched if '<%' was specified)
91 571         1757 for my $ak (keys %$tsubs){
92 11         32 my $av = $tsubs->{$ak};
93 11 50       219 if( ($ak =~ /(?:\Q${tvop}\E)|(?:\Q${tvcl}\E)/) ){ warn __PACKAGE__.'::config2perl()'." (line ".__LINE__.") : error, variable names can not contain the specified opening ($tvop) and/or closing ($tvcl) variable name tags."; return undef }
  0         0  
  0         0  
94 11         391 $contents =~ s!\Q${tvop}\E\s*${ak}\s*\Q${tvcl}\E!${av}!g;
95             }
96             # this is a warning:
97             # we can not be sure if this <% xyz %> is part of the content or a forgotten templated variable
98 571 50       3035 if( $contents =~ /\Q${tvop}\E!(:?(:?begin-verbatim-section)|(:?end-verbatim-section))%*>/ ){ warn "--begin content:\n".$contents."\n--end content.\n".__PACKAGE__.'::config2perl()'." (line ".__LINE__.") : warning, there may still be remains of templated variables in the specified content, see above what remained after all template variables substitutions were done." }
  0         0  
99              
100             # secondly, remove the VERBATIM multiline sections and transform them.
101             # Comments inside the verbatim section will NOT BE touched.
102             # The only thing touched was the templated variables earlier
103             # it substitutes each verbatim section with a code
104             # then does the comments and then replaces the code with the verbatim section at the very end
105 571         1084 my @verbs;
106 571         1017 my $idx = 0;
107 571         4816 while( $contents =~ s/\Q${tvop}\E*begin-verbatim-section\s*\Q${tvcl}\E(.+?)\Q${tvop}\E*end-verbatim-section\s*\Q${tvcl}\E/"${tvop}verbatim-section-${idx}${tvcl}"/s ){
108 60         225 my $vc = $1;
109             # remove from start and end of whole string newlines+spaces
110 60         381 $vc =~ s/^[\n\t ]+//;
111 60         895 $vc =~ s/[\n\t ]+$//;
112             # remove newlines followed by optional spaces at the beginning of each line
113 60         618 $vc =~ s/\n+[ \t]*/\\n/gs;
114             # escape all double quotes (naively)
115             # but not those which are already escaped (naively)
116 60         385 $vc =~ s/\\"/<%__abcQQxyz__%>/g;
117 60         460 $vc =~ s/"/\\"/g;
118 60         325 $vc =~ s/<%__abcQQxyz__%>/\\\\\\"/g;
119             # so echo "aa \"xx\""
120             # becomes echo \"aa \\\"xx\\\"\"
121 60         140 push @verbs, $vc;
122 60         1072 $idx++;
123             }
124              
125             # thirdly, replace all strings with indexed markers
126             # so that their contained comments
127             # to be left intact after the comment substitution which will
128             # be done later on.
129 571         968 my @stringsubs;
130 571 100       1252 if( $remove_comments_in_strings == 0 ){
131 563         872 $idx = 0;
132 563         7068 while( $contents =~ s/(?
133 4767         11726 push @stringsubs, $1;
134 4767         38184 $idx++;
135             }
136             }
137              
138             # thirdly, remove comments: 'shell' and/or 'C' and/or 'CPP'
139             # and/or multiple instances of 'custom()()'
140 571         1080 my $tc = $commentstyle;
141 571 100       3003 if( $tc =~ s/\bC\b//i ){
142 432         4679 $contents =~ s/\/\*(?:(?!\*\/).)*\*\/\n?//sg;
143             }
144 571 100       2605 if( $tc =~ s/\bCPP\b//i ){
145 379         1585 $contents =~ s/\/\*(?:(?!\*\/).)*\*\/\n?//sg;
146 379         2196 $contents =~ s!//.*$!!mg;
147             }
148 571 100       2300 if( $tc =~ s/\bshell\b//i ){
149             # TODO: we must also remove the newline left!
150 399         2800 $contents =~ s/#.*$//mg;
151             }
152              
153             # specify a custom comment style with required opening string
154             # and an optional closing
155             # e.g. custom(required)(optional), custom(<<)(>>) or custom(REM)()
156 571         2885 while( $tc =~ s/\bcustom\((.+?)\)\((.*?)\)//i ){
157             # mulitple custom(opening)(closing) commentstyle are allowed
158             # 'opening' and 'closing' can be any string
159             # And need not be balanced e.g. <<< and >>
160             # And can be the same e.g. <<< and <<<
161 1919         4629 my $op = $1; my $cl = $2;
  1919         3022  
162 1919 100       5067 if( $cl =~ /^\s*$/ ){
163             # TODO: we must also remove the newline left!
164 391         3348 $contents =~ s/${op}.*$//mg;
165             } else {
166 1528         33095 $contents =~ s/${op}(?:(?!${cl}).)*${cl}\n?//sg;
167             }
168             }
169 571 50       1679 if( $tc =~ /[a-z]/i ){ warn __PACKAGE__.'::config2perl()'." (line ".__LINE__.") : error, comments style '${commentstyle}' was not understood, this is what was left after parsing it: '${tc}'."; return undef }
  0         0  
  0         0  
170              
171             # this is a warning:
172             # we can not be sure if this <% xyz %> is part of the content or a forgotten templated variable
173 571 100       2762 if( $contents =~ /\Q${tvop}\E.+?-verbatim-section\s*\Q${tvcl}\E/ ){ warn "--begin content:\n".$contents."\n--end content.\n".__PACKAGE__.'::config2perl()'." (line ".__LINE__.") : warning, there may still be remains of templated variables in the specified content, see above what remained after all verbatime sections were removed." }
  1         91  
174              
175 571 100       1364 if( $remove_comments_in_strings == 0 ){
176 563         838 $idx = 0;
177 563         1629 for($idx=scalar(@stringsubs);$idx-->0;){
178 4767         9114 my $astring = $stringsubs[$idx];
179 4767         60546 $contents =~ s/___EJSTRING\($idx\)___/"${astring}"/g
180             }
181             }
182              
183             # and now substitute the transformed verbatim sections back
184 571         1738 for($idx=scalar(@verbs);$idx-->0;){
185 60         1071 $contents =~ s/\Q${tvop}\Everbatim-section-${idx}\Q${tvcl}\E/$verbs[$idx]/g;
186             }
187              
188 571 50       1254 if( $debug > 0 ){ warn $contents."\n\n".__PACKAGE__.'::config2perl()'." (line ".__LINE__.") : produced above standard JSON from enhanced JSON content." }
  0         0  
189              
190             # here $contents must contain standard JSON which we parse:
191 571         2035 my $inhash = json2perl($contents);
192 571 100       15173 if( ! defined $inhash ){ warn $contents."\n\n".__PACKAGE__.'::config2perl()'." (line ".__LINE__.") : error, call to ".'Data::Roundtrip::json2perl()'." has failed for above json string and comments style '${commentstyle}'."; return undef }
  1         18  
  1         8  
193 570         2668 return $inhash
194             }
195              
196             =pod
197              
198             =head1 NAME
199              
200             Config::JSON::Enhanced - JSON-based config with C/Shell-style comments, verbatim sections and variable substitutions
201              
202             =head1 VERSION
203              
204             Version 0.07
205              
206             =head1 SYNOPSIS
207              
208             This module provides subroutine C for parsing configuration content,
209             from files or strings, based on, what I call, "enhanced JSON" (see section
210             L for more details). Briefly, it is standard JSON which allows:
211              
212             =over 2
213              
214             =item * C-style, C-style, C-style or custom comments.
215              
216             =item * Template-style variables (e.g. C% appdir %E>)
217             which are substituted with user-specified data during parsing.
218              
219             =item * Verbatim sections which are a sort of here-doc for JSON,
220             allowing strings to span multiple
221             lines, to contain single and double quotes unescaped,
222             to contain template-style variables.
223              
224             =back
225              
226             This module was created because I needed to include
227             long shell scripts containing lots of quotes and newlines,
228             in a configuration file which started as JSON.
229              
230             The process is simple: so-called "enhanced JSON" is parsed
231             by L. Comments are removed, variables are
232             substituted, verbatim sections become one line again
233             and standard JSON is created. This is parsed with
234             L (via L) to
235             produce a Perl data structure which is returned.
236              
237             It has been tested with unicode data
238             (see C)
239             with success. But who knows ?!?!
240              
241             Here is an example:
242              
243             use Config::JSON::Enhanced;
244              
245             # simple "enhanced" JSON with comments in 3 styles: C,shell,CPP
246             my $configdata = <<'EOJ';
247             {
248             /* 'a' is ... */
249             "a" : "abc",
250             # b is ...
251             "b" : [1,2,3],
252             "c" : 12 // c is ...
253             }
254             EOJ
255             my $perldata = config2perl({
256             'string' => $configdata,
257             'commentstyle' => "C,shell,CPP",
258             });
259             die "call to config2perl() has failed" unless defined $perldata;
260             # the standard JSON:
261             # {"a" : "abc","b" : [1,2,3], "c" : 12}
262              
263              
264             # this "enhanced" JSON demonstrates the use of variables
265             # which will be substituted during the transformation to
266             # standard JSON with user-specified data.
267             # Notice that the opening and closing tags enclosing variable
268             # names can be customised using the 'tags' input parameter,
269             # so as to avoid clashes.
270             my $configdata = <<'EOJ';
271             {
272             "d" : [1,2,<% tempvar0 %>],
273             "configfile" : "<%SCRIPTDIR%>/config/myapp.conf",
274             "username" : "<% username %>"
275             }
276             }
277             EOJ
278             my $perldata = config2perl({
279             'string' => $configdata,
280             'commentstyle' => "C,shell,CPP",
281             # optionally customise the tags enclosing the variables
282             #'tags' => ['<%', '%>'],
283             # user-specified data to replace the variables in
284             # the "enhanced" JSON above:
285             'variable-substitutions' => {
286             'tempvar0' => 42,
287             'username' => getlogin(),
288             'SCRIPTDIR' => $FindBin::Bin,
289             },
290             });
291             die "call to config2perl() has failed" unless defined $perldata;
292             # the standard JSON
293             # (notice how all variables in <%...%> are now replaced):
294             # {"d" : [1,2,42],
295             # "username" : "yossarian",
296             # "configfile" : "/home/yossarian/B52/config/myapp.conf"
297             # }
298              
299              
300             # this "enhanced" JSON demonstrates "verbatim sections"
301             # the puprose of which is to make more readable JSON strings
302             # by allowing them to span over multiple lines.
303             # There is also no need for escaping double quotes.
304             # template variables (like above) will be substituted
305             # There will be no comments removal from the verbatim sections.
306             my $configdata = <<'EOJ';
307             {
308             "a" : <%begin-verbatim-section%>
309             This is a multiline
310             string
311             "quoted text" and 'quoted like this also'
312             will be retained in the string escaped.
313             White space from beginning and end will be chomped.
314            
315             <%end-verbatim-section%>
316             ,
317             "b" = 123
318             }
319             EOJ
320             my $perldata = config2perl({
321             'string' => $configdata,
322             'commentstyle' => "C,shell,CPP",
323             });
324             die "call to config2perl() has failed" unless defined $perldata;
325             # the standard JSON (notice that "a" value is in a single line,
326             # here printed broken for readability):
327             # {"a" :
328             # "This is a multiline\nstring\n\"quoted text\" and 'quoted like
329             # this also'\nwill be retained in the string escaped.\nComments
330             # will not be removed.\nWhite space from
331             # beginning and end will be chomped.",
332             # "b" : 123
333             # };
334              
335              
336             =head1 EXPORT
337              
338             =over 4
339              
340             =item * C is exported by default.
341              
342             =back
343              
344              
345             =head1 SUBROUTINES
346              
347             =head2 C
348              
349             my $ret = config2perl($params);
350             die unless defined $ret;
351              
352             Arguments:
353              
354             =over 4
355              
356             =item * C<$params> : a hashref of input parameters.
357              
358             =back
359              
360             Return value:
361              
362             =over 4
363              
364             =item * the parsed content as a Perl data structure
365             on success or C on failure.
366              
367             =back
368              
369             Given input content in L, this sub removes comments
370             (as per preferences via input parameters),
371             replaces all template variables, if any,
372             compacts L, if any, into a single-line
373             string and then parses
374             what remains as standard JSON into a Perl data structure
375             which is returned to caller. JSON parsing is done with
376             L, which uses L.
377              
378             Comments outside of JSON fields will always be removed,
379             otherwise JSON can not be parsed.
380              
381             Comments inside of JSON fields, keys, values, strings etc.
382             will not be removed unless input parameter C
383             is set to 1 by the caller.
384              
385             Comments (or what looks like comments with the current input parameters)
386             inside L will never be removed.
387              
388             The input content to-be-parsed can be specified
389             with one of the following input parameters (entries in the
390             C<$params>):
391              
392             =over 4
393              
394             =item * C : content is read from a file with this name.
395              
396             =item * C : content is read from a file which has already
397             been opened for reading by the caller.
398              
399             =item * C : content is contained in this string.
400              
401             =back
402              
403             Additionally, input parameters can contain the following keys:
404              
405             =over 4
406              
407             =item * C : specify what comment style(s) to be expected
408             in the input content (if any) as a B. For example
409             C<'C,CPP,shell,custom(EE)(EE),custom(REM)()'>.
410             These are the values it understands:
411              
412             =over 2
413              
414             =item * C : comments take the form of C-style comments which
415             are exclusively within C. For example C<* I am a comment */>.
416             This is the B if none specified.
417              
418             =item * C : comments can the the form of C++-style comments
419             which are within C or after C until the end of line.
420             For example C, C.
421              
422             =item * C : comments can be after C<#> until the end of line.
423             For example, C<# I am a comment to the end of line>.
424              
425             =item * C : comments are enclosed (or preceded) by custom,
426             user-specified tags. The form is C.
427             C is required. C is optional meaning that
428             the comment extends to the end of line (just like C comments).
429             For example CE)(EE)> or
430             C or C or CEEE)(EE)>.
431             C and C do not need to be of
432             the same character length as it is
433             obvious from the previous example.
434              
435             =back
436              
437             =item * C : a hashref whose keys are
438             variable names as they occur in the input I content
439             and their corresponding values should substitute them. I,
440             can contain template variables in the form C% my-var-1 %E>. These
441             must be replaced with data which is supplied to the call of C
442             under the parameters key C, for example:
443            
444             config2perl({
445             "variable-substitutions" => {
446             "my-var-1" => 42,
447             "SCRIPTDIR" => "/home/abc",
448             },
449             "string" => '{"a":"<% my-var-1 %>", "b":"<% SCRIPTDIR %>/app.conf"}',
450             });
451              
452             Variable substitution will be performed in both
453             keys and values of the input JSON, including L.
454              
455             =item * C : by default no attempt
456             to remove what-looks-like-comments from JSON strings
457             (both keys and values). However, if this flag is set to
458             C<1> anything that looks like comments (as per the 'C'
459             parameter) will be removed from inside all JSON strings
460             (keys or values) unless they were part of verbatim section.
461              
462             This does not apply for the content verbatim sections.
463             What looks like comments to us, inside verbatim sections
464             will be left intact.
465              
466             For example consider the JSON string C<"hello/*a comment*/">
467             (which can be a key or a value). If C is
468             set to 1, then the JSON string will become C. If set to
469             0 (which is the default) it will be unchanged.
470              
471             =item * C : specify the opening and closing tags for template
472             variables and verbatim section as an ARRAYref of exactly 2 items (the
473             opening and the closing tags). By default the opening tag is C%>
474             and the closing tag is C<%E>.
475              
476             If you set C [ '[::', '::]' ]>
477             then your template variables should look like this: C<{:: var1 ::]> and
478             verbatim sections like this: C<[:: begin-verbatim-section ::]>.
479              
480             =item * C : set this to a positive integer to increase verbosity
481             and dump debugging messages. Default is zero for zero verbosity.
482              
483             =back
484              
485             See section L for details on the format
486             of B I.
487              
488             C returns the parsed content as a Perl data structure
489             on success or C on failure.
490              
491              
492             =head1 ENHANCED JSON FORMAT
493              
494             This is JSON with added reasonable, yet completely ad-hoc, enhancements
495             (from my point of view).
496              
497             These enhancements are:
498              
499             =over 4
500              
501             =item * B:
502              
503             =over 2
504              
505             =item * C-style comments take the form of C-style comments which
506             are exclusively within C. For example C<* I am a comment */>
507              
508             =item * C-style comments can the the form of C++-style comments
509             which are within C or after C until the end of line.
510             For example C, C
511              
512             =item * C-style comments can be after C<#> until the end of line.
513             For example, C<# I am a comment to the end of line.>
514              
515             =item * comments with C, user-specified, opening and
516             optional closing tags
517             which allows fine-tuning the process of deciding on something being a
518             comment.
519              
520             =back
521              
522             =item * B