File Coverage

blib/lib/Config/JSON/Enhanced.pm
Criterion Covered Total %
statement 106 121 87.6
branch 37 48 77.0
condition 12 23 52.1
subroutine 10 10 100.0
pod 1 1 100.0
total 166 203 81.7


line stmt bran cond sub pod time code
1             package Config::JSON::Enhanced;
2              
3 14     14   1986575 use 5.010;
  14         197  
4 14     14   85 use strict;
  14         26  
  14         359  
5 14     14   89 use warnings;
  14         40  
  14         746  
6              
7             our $VERSION = '0.08';
8              
9 14     14   99 use strict;
  14         30  
  14         376  
10 14     14   74 use warnings;
  14         34  
  14         577  
11              
12             # which loads JSON::XS with a purel-perl JSON fallback
13 14     14   7351 use JSON;
  14         117930  
  14         93  
14              
15 14     14   7833 use Data::Roundtrip qw/json2perl perl2dump no-unicode-escape-permanently/;
  14         494329  
  14         107  
16              
17 14     14   1548 use Exporter; # we have our own import() don't import it
  14         51  
  14         22169  
18             our @ISA = qw(Exporter);
19             our @EXPORT = qw/
20             config2perl
21             /;
22              
23             # Convert enhanced JSON string into a Perl data structure.
24             # The input parameters hashref:
25             # * specify where is the content to be parsed via:
26             # 'filename',
27             # 'filehandle', or,
28             # 'string'
29             # * optional 'commentstyle' is a string of comma separated
30             # commentstyles (valid styles are C, CPP, shell)
31             # * optional 'variable-substitutions' is a hashref with
32             # keys as template variable names to be substutited
33             # inside the content with their corresponding values.
34             # For example {'xx' => 'hello'} will substitute
35             # <% xx %> with hello
36             # * optional 'remove-comments-in-strings' to remove comments from JSON strings
37             # (both keys and values), default is to KEEP anything inside a string
38             # even if it looks like comments we are supposed to remove (because string
39             # can be a bash script, for example).
40             # * optional 'debug' for setting verbosity, default is zero.
41             #
42             # It returns the created Perl data structure or undef on failure.
43             sub config2perl {
44 572   50 572 1 3394056 my $params = shift // {};
45              
46 572         1136 my $contents;
47 572 100 66     5053 if( exists($params->{'filename'}) && defined(my $infile=$params->{'filename'}) ){
    100 66        
    50 33        
48 19         52 my $fh;
49 19 50   7   756 if( ! open $fh, '<:encoding(UTF-8)', $infile ){ warn __PACKAGE__.'::configfile2perl()'." (line ".__LINE__.") : error, failed to open file '$infile' for reading, $!"; return undef }
  0         0  
  0         0  
  7         68  
  7         14  
  7         52  
50 19         11272 { local $/ = undef; $contents = <$fh> }; close $fh;
  19         109  
  19         749  
  19         911  
51             } elsif( exists($params->{'filehandle'}) && defined(my $fh=$params->{'filehandle'}) ){
52 11         31 { local $/ = undef; $contents = <$fh> }
  11         54  
  11         478  
53             # we are not closing the filehandle, it is caller-specified, so caller responsibility
54             } elsif( exists($params->{'string'}) && defined($params->{'string'}) ){
55 542         1126 $contents = $params->{'string'};
56             }
57 572 50       1468 if( ! defined $contents ){ warn __PACKAGE__.'::configfile2perl()'." (line ".__LINE__.") : error, one of 'filename', 'filehandle' or 'string' must be specified in the parameters hash as the source of the configuration contents."; return undef }
  0         0  
  0         0  
58              
59             my $debug = exists($params->{'debug'}) && defined($params->{'debug'})
60 572 50 33     1814 ? $params->{'debug'} : 0
61             ;
62              
63             my $commentstyle = exists($params->{'commentstyle'}) && defined($params->{'commentstyle'})
64 572 50 33     2242 ? $params->{'commentstyle'} : 'C'
65             ;
66              
67 572         1145 my ($tvop, $tvcl);
68 572 100 66     1471 if( exists($params->{'tags'}) && defined($params->{'tags'}) ){
69 13 50       74 if( ref($params->{'tags'}) ne 'ARRAY' ){ warn __PACKAGE__.'::configfile2perl()'." (line ".__LINE__.") : error, input parameter 'tags' must be an ARRAYref of exactly 2 items and not a ".ref($params->{'tags'})."."; return undef }
  0         0  
  0         0  
70 13 50       21 if( scalar(@{ $params->{'tags'} }) != 2 ){ warn __PACKAGE__.'::configfile2perl()'." (line ".__LINE__.") : error, input parameter 'tags' must be an ARRAYref of exactly 2 items and not ".scalar(@{ $params->{'tags'} })."."; return undef }
  13         37  
  0         0  
  0         0  
  0         0  
71 13         22 ($tvop, $tvcl) = @{ $params->{'tags'} };
  13         37  
72 559         1120 } else { $tvop = '<%'; $tvcl = '%>' }
  559         834  
73              
74             my $tsubs = exists($params->{'variable-substitutions'})
75 572 100       1255 ? $params->{'variable-substitutions'} : undef
76             ;
77              
78             # remove comments inside strings? default is NO, keep comments if inside strings
79             # because they may not be our comments (e.g. string contains a bash script)
80             my $remove_comments_in_strings = exists($params->{'remove-comments-in-strings'}) && defined($params->{'remove-comments-in-strings'})
81 572 100 66     1565 ? $params->{'remove-comments-in-strings'} : 0
82             ;
83              
84             # firstly, substitute templated variables if any
85             # with the user-specified data.
86             # This includes ANYTHNING in the input enhanced JSON including
87             # verbatim sections, keys, values, etc.
88             # The opening and closing tags of vars are user-specified
89             # and are NOT allowed to contain spaces in between
90             # (e.g. '< %' will not be matched if '<%' was specified)
91 572         1729 for my $ak (keys %$tsubs){
92 35         86 my $av = $tsubs->{$ak};
93 35 50       470 if( ($ak =~ /(?:\Q${tvop}\E)|(?:\Q${tvcl}\E)/) ){ warn __PACKAGE__.'::config2perl()'." (line ".__LINE__.") : error, variable names can not contain the specified opening ($tvop) and/or closing ($tvcl) variable name tags."; return undef }
  0         0  
  0         0  
94 35         953 $contents =~ s!\Q${tvop}\E\s*${ak}\s*\Q${tvcl}\E!${av}!g;
95             }
96             # this is a warning:
97             # we can not be sure if this <% xyz %> is part of the content or a forgotten templated variable
98 572 50       3640 if( $contents =~ /\Q${tvop}\E\s*!(:?(:?begin-verbatim-section)|(:?end-verbatim-section))\s*\Q${tvcl}\E/ ){ warn "--begin content:\n".$contents."\n--end content.\n".__PACKAGE__.'::config2perl()'." (line ".__LINE__.") : warning, there may still be remains of templated variables in the specified content, see above what remained after all template variables substitutions were done." }
  0         0  
99              
100             # secondly, remove the VERBATIM multiline sections and transform them.
101             # Comments inside the verbatim section will NOT BE touched.
102             # The only thing touched was the templated variables earlier
103             # it substitutes each verbatim section with a code
104             # then does the comments and then replaces the code with the verbatim section at the very end
105 572         1049 my @verbs;
106 572         1044 my $idx = 0;
107 572         4806 while( $contents =~ s/\Q${tvop}\E\s*begin-verbatim-section\s*\Q${tvcl}\E(.*?)\Q${tvop}\E\s*end-verbatim-section\s*\Q${tvcl}\E/"___my___verbatim-section-${idx}___my___"/s ){
108 72         251 my $vc = $1;
109             # remove from start and end of whole string newlines+spaces
110 72         434 $vc =~ s/^[\n\t ]+//;
111 72         978 $vc =~ s/[\n\t ]+$//;
112             # remove newlines followed by optional spaces at the beginning of each line
113 72         698 $vc =~ s/\n+[ \t]*/\\n/gs;
114             # escape all double quotes (naively)
115             # but not those which are already escaped (naively)
116 72         393 $vc =~ s/\\"/<%__abcQQxyz__%>/g;
117 72         468 $vc =~ s/"/\\"/g;
118 72         367 $vc =~ s/<%__abcQQxyz__%>/\\\\\\"/g;
119             # so echo "aa \"xx\""
120             # becomes echo \"aa \\\"xx\\\"\"
121 72         179 push @verbs, $vc;
122 72         1154 $idx++;
123             }
124              
125             # thirdly, replace all JSON strings (keys or values) with indexed markers
126             # so that their contained comments
127             # to be left intact after the comment substitution which will
128             # be done later on.
129 572         1059 my @stringsubs;
130 572 100       1330 if( $remove_comments_in_strings == 0 ){
131 564         875 $idx = 0;
132 564         7182 while( $contents =~ s/(?
133 4727         11675 push @stringsubs, $1;
134 4727         40234 $idx++;
135             }
136             }
137              
138             # thirdly, remove comments: 'shell' and/or 'C' and/or 'CPP'
139             # and/or multiple instances of 'custom()()'
140 572         1070 my $tc = $commentstyle;
141 572 100       2957 if( $tc =~ s/\bC\b//i ){
142 407         3992 $contents =~ s/\/\*(?:(?!\*\/).)*\*\/\n?//sg;
143             }
144 572 100       2725 if( $tc =~ s/\bCPP\b//i ){
145 422         1961 $contents =~ s/\/\*(?:(?!\*\/).)*\*\/\n?//sg;
146 422         2461 $contents =~ s!//.*$!!mg;
147             }
148 572 100       2322 if( $tc =~ s/\bshell\b//i ){
149             # TODO: we must also remove the newline left!
150 373         2496 $contents =~ s/#.*$//mg;
151             }
152              
153             # specify a custom comment style with required opening string
154             # and an optional closing
155             # e.g. custom(required)(optional), custom(<<)(>>) or custom(REM)()
156 572         2909 while( $tc =~ s/\bcustom\((.+?)\)\((.*?)\)//i ){
157             # mulitple custom(opening)(closing) commentstyle are allowed
158             # 'opening' and 'closing' can be any string
159             # And need not be balanced e.g. <<< and >>
160             # And can be the same e.g. <<< and <<<
161 1760         4340 my $coop = $1; my $cocl = $2;
  1760         2928  
162 1760 100       4627 if( $cocl =~ /^\s*$/ ){
163             # TODO: we must also remove the newline left!
164 385         3804 $contents =~ s/\Q${coop}\E.*$//mg;
165             } else {
166 1375         34268 $contents =~ s/\Q${coop}\E(?:(?!\Q${cocl}\E\s*).)*\Q${cocl}\E\s*\n?//sg;
167             }
168             }
169 572 50       1752 if( $tc =~ /[a-z]/i ){ warn __PACKAGE__.'::config2perl()'." (line ".__LINE__.") : error, comments style '${commentstyle}' was not understood, this is what was left after parsing it: '${tc}'."; return undef }
  0         0  
  0         0  
170              
171             # this is a warning:
172             # we can not be sure if this <% xyz %> is part of the content or a forgotten templated variable
173 572 100       2716 if( $contents =~ /\Q${tvop}\E.+?-verbatim-section\s*\Q${tvcl}\E/ ){ warn "--begin content:\n".$contents."\n--end content.\n".__PACKAGE__.'::config2perl()'." (line ".__LINE__.") : warning, there may still be remains of templated variables in the specified content, see above what remained after all verbatime sections were removed." }
  1         97  
174              
175 572 100       1387 if( $remove_comments_in_strings == 0 ){
176 564         830 $idx = 0;
177 564         1696 for($idx=scalar(@stringsubs);$idx-->0;){
178 4727         8995 my $astring = $stringsubs[$idx];
179 4727         67632 $contents =~ s/___my___EJSTRING\($idx\)___my___/"${astring}"/g
180             }
181             }
182              
183             # and now substitute the transformed verbatim sections back
184 572         1725 for($idx=scalar(@verbs);$idx-->0;){
185 72         1123 $contents =~ s/___my___verbatim-section-${idx}___my___/$verbs[$idx]/g;
186             }
187              
188 572 50       1177 if( $debug > 0 ){ warn $contents."\n\n".__PACKAGE__.'::config2perl()'." (line ".__LINE__.") : produced above standard JSON from enhanced JSON content." }
  0         0  
189              
190             # here $contents must contain standard JSON which we parse:
191 572         2102 my $inhash = json2perl($contents);
192 572 100       15427 if( ! defined $inhash ){ warn $contents."\n\n".__PACKAGE__.'::config2perl()'." (line ".__LINE__.") : error, call to ".'Data::Roundtrip::json2perl()'." has failed for above json string and comments style '${commentstyle}'."; return undef }
  1         37  
  1         10  
193 571         2615 return $inhash
194             }
195              
196             =pod
197              
198             =head1 NAME
199              
200             Config::JSON::Enhanced - JSON-based config with C/Shell-style comments, verbatim sections and variable substitutions
201              
202             =head1 VERSION
203              
204             Version 0.08
205              
206             =head1 SYNOPSIS
207              
208             This module provides subroutine C for parsing configuration content,
209             from files or strings, based on, what I call, "enhanced JSON" (see section
210             L for more details). Briefly, it is standard JSON which allows:
211              
212             =over 2
213              
214             =item * C-style, C-style, C-style or custom comments.
215              
216             =item * Template-style variables (e.g. C% appdir %E>)
217             which are substituted with user-specified data during parsing.
218              
219             =item * Verbatim sections which are a sort of here-doc for JSON,
220             allowing strings to span multiple
221             lines, to contain single and double quotes unescaped,
222             to contain template-style variables.
223              
224             =back
225              
226             This module was created because I needed to include
227             long shell scripts containing lots of quotes and newlines,
228             in a configuration file which started as JSON.
229              
230             The process is simple: so-called "enhanced JSON" is parsed
231             by L. Comments are removed, variables are
232             substituted, verbatim sections become one line again
233             and standard JSON is created. This is parsed with
234             L (via L) to
235             produce a Perl data structure which is returned.
236              
237             It has been tested with unicode data
238             (see C)
239             with success. But who knows ?!?!
240              
241             Here is an example:
242              
243             use Config::JSON::Enhanced;
244              
245             # simple "enhanced" JSON with comments in 3 styles: C,shell,CPP
246             my $configdata = <<'EOJ';
247             {
248             /* 'a' is ... */
249             "a" : "abc",
250             # b is ...
251             "b" : [1,2,3],
252             "c" : 12 // c is ...
253             }
254             EOJ
255             my $perldata = config2perl({
256             'string' => $configdata,
257             'commentstyle' => "C,shell,CPP",
258             });
259             die "call to config2perl() has failed" unless defined $perldata;
260             # the standard JSON:
261             # {"a" : "abc","b" : [1,2,3], "c" : 12}
262              
263              
264             # this "enhanced" JSON demonstrates the use of variables
265             # which will be substituted during the transformation to
266             # standard JSON with user-specified data.
267             # Notice that the opening and closing tags enclosing variable
268             # names can be customised using the 'tags' input parameter,
269             # so as to avoid clashes with content in the JSON.
270             my $configdata = <<'EOJ';
271             {
272             "d" : [1,2,<% tempvar0 %>],
273             "configfile" : "<%SCRIPTDIR%>/config/myapp.conf",
274             "username" : "<% username %>"
275             }
276             }
277             EOJ
278             my $perldata = config2perl({
279             'string' => $configdata,
280             'commentstyle' => "C,shell,CPP",
281             # optionally customise the tags enclosing the variables
282             # when you want to avoid clashes with other strings in JSON
283             #'tags' => ['<%', '%>'], # <<< these are the default values
284             # user-specified data to replace the variables in
285             # the "enhanced" JSON above:
286             'variable-substitutions' => {
287             'tempvar0' => 42,
288             'username' => getlogin(),
289             'SCRIPTDIR' => $FindBin::Bin,
290             },
291             });
292             die "call to config2perl() has failed" unless defined $perldata;
293             # the standard JSON
294             # (notice how all variables in <%...%> are now replaced):
295             # {"d" : [1,2,42],
296             # "username" : "yossarian",
297             # "configfile" : "/home/yossarian/B52/config/myapp.conf"
298             # }
299              
300              
301             # this "enhanced" JSON demonstrates "verbatim sections"
302             # the puprose of which is to make more readable JSON strings
303             # by allowing them to span over multiple lines.
304             # There is also no need for escaping double quotes.
305             # template variables (like above) will be substituted
306             # There will be no comments removal from the verbatim sections.
307             my $configdata = <<'EOJ';
308             {
309             "a" : <%begin-verbatim-section%>
310             This is a multiline
311             string
312             "quoted text" and 'quoted like this also'
313             will be retained in the string escaped.
314             White space from beginning and end will be chomped.
315            
316             <%end-verbatim-section%>
317             ,
318             "b" = 123
319             }
320             EOJ
321             my $perldata = config2perl({
322             'string' => $configdata,
323             'commentstyle' => "C,shell,CPP",
324             });
325             die "call to config2perl() has failed" unless defined $perldata;
326             # the standard JSON (notice that "a" value is in a single line,
327             # here printed broken for readability):
328             # {"a" :
329             # "This is a multiline\nstring\n\"quoted text\" and 'quoted like
330             # this also'\nwill be retained in the string escaped.\nComments
331             # will not be removed.\nWhite space from
332             # beginning and end will be chomped.",
333             # "b" : 123
334             # };
335              
336              
337             =head1 EXPORT
338              
339             =over 4
340              
341             =item * C is exported by default.
342              
343             =back
344              
345              
346             =head1 SUBROUTINES
347              
348             =head2 C
349              
350             my $ret = config2perl($params);
351             die unless defined $ret;
352              
353             Arguments:
354              
355             =over 4
356              
357             =item * C<$params> : a hashref of input parameters.
358              
359             =back
360              
361             Return value:
362              
363             =over 4
364              
365             =item * the parsed content as a Perl data structure
366             on success or C on failure.
367              
368             =back
369              
370             Given input content in L, this sub removes comments
371             (as per preferences via input parameters),
372             replaces all template variables, if any,
373             compacts L, if any, into a single-line
374             string and then parses
375             what remains as standard JSON into a Perl data structure
376             which is returned to caller. JSON parsing is done with
377             L, which uses L.
378              
379             Comments outside of JSON fields will always be removed,
380             otherwise JSON can not be parsed.
381              
382             Comments inside of JSON fields, keys, values, strings etc.
383             will not be removed unless input parameter C
384             is set to 1 by the caller.
385              
386             Comments (or what looks like comments with the current input parameters)
387             inside L will never be removed.
388              
389             The input content to-be-parsed can be specified
390             with one of the following input parameters (entries in the
391             C<$params>):
392              
393             =over 4
394              
395             =item * C : content is read from a file with this name.
396              
397             =item * C : content is read from a file which has already
398             been opened for reading by the caller.
399              
400             =item * C : content is contained in this string.
401              
402             =back
403              
404             Additionally, input parameters can contain the following keys:
405              
406             =over 4
407              
408             =item * C : specify what comment style(s) to be expected
409             in the input content (if any) as a B. For example
410             C<'C,CPP,shell,custom(EE)(EE),custom(REM)()'>.
411             These are the values it understands:
412              
413             =over 2
414              
415             =item * C : comments take the form of C-style comments which
416             are exclusively within C. For example C<* I am a comment */>.
417             This is the B if none specified.
418              
419             =item * C : comments can the the form of C++-style comments
420             which are within C or after C until the end of line.
421             For example C, C.
422              
423             =item * C : comments can be after C<#> until the end of line.
424             For example, C<# I am a comment to the end of line>.
425              
426             =item * C : comments are enclosed (or preceded) by custom,
427             user-specified tags. The form is C.
428             C is required. C is optional meaning that
429             the comment extends to the end of line (just like C comments).
430             For example CE)(EE)> or
431             C or C or CEEE)(EE)>.
432             C and C do not need to be of
433             the same character length as it is
434             obvious from the previous example. A word of warning:
435             the regex for identifying comments (and variables and verbatim sections)
436             has the custom tags escaped for special regex characters
437             (with the C<\Q ... \E> construct). So you are pretty safe in using
438             any character. Please report weird behaviour.
439              
440             =back
441              
442             =item * C : a hashref whose keys are
443             variable names as they occur in the input I content
444             and their corresponding values should substitute them. I,
445             can contain template variables in the form C% my-var-1 %E>. These
446             must be replaced with data which is supplied to the call of C
447             under the parameters key C, for example:
448            
449             config2perl({
450             "variable-substitutions" => {
451             "my-var-1" => 42,
452             "SCRIPTDIR" => "/home/abc",
453             },
454             "string" => '{"a":"<% my-var-1 %>", "b":"<% SCRIPTDIR %>/app.conf"}',
455             });
456              
457             Variable substitution will be performed in both
458             keys and values of the input JSON, including L.
459              
460             =item * C : by default no attempt
461             to remove what-looks-like-comments from JSON strings
462             (both keys and values). However, if this flag is set to
463             C<1> anything that looks like comments (as per the 'C'
464             parameter) will be removed from inside all JSON strings
465             (keys or values) unless they were part of verbatim section.
466              
467             This does not apply for the content verbatim sections.
468             What looks like comments to us, inside verbatim sections
469             will be left intact.
470              
471             For example consider the JSON string C<"hello/*a comment*/">
472             (which can be a key or a value). If C is
473             set to 1, then the JSON string will become C. If set to
474             0 (which is the default) it will be unchanged.
475              
476             =item * C : specify the opening and closing tags for template
477             variables and verbatim section as an ARRAYref of exactly 2 items (the
478             opening and the closing tags). By default the opening tag is C%>
479             and the closing tag is C<%E>. A word of warning:
480             the regex for identifying variables and verbatim sections (and comments)
481             has the custom tags escaped for special regex characters
482             (with the C<\Q ... \E> construct). So you are pretty safe in using
483             any character. Please report weird behaviour.
484              
485             If you set C [ '[::', '::]' ]>
486             then your template variables should look like this: C<{:: var1 ::]> and
487             verbatim sections like this: C<[:: begin-verbatim-section ::]>.
488              
489             =item * C : set this to a positive integer to increase verbosity
490             and dump debugging messages. Default is zero for zero verbosity.
491              
492             =back
493              
494             See section L for details on the format
495             of B I.
496              
497             C returns the parsed content as a Perl data structure
498             on success or C on failure.
499              
500              
501             =head1 ENHANCED JSON FORMAT
502              
503             This is JSON with added reasonable, yet completely ad-hoc, enhancements
504             (from my point of view).
505              
506             These enhancements are:
507              
508             =over 4
509              
510             =item * B:
511              
512             =over 2
513              
514             =item * C-style comments take the form of C-style comments which
515             are exclusively within C. For example C<* I am a comment */>
516              
517             =item * C-style comments can the the form of C++-style comments
518             which are within C or after C until the end of line.
519             For example C, C
520              
521             =item * C-style comments can be after C<#> until the end of line.
522             For example, C<# I am a comment to the end of line.>
523              
524             =item * comments with C, user-specified, opening and
525             optional closing tags
526             which allows fine-tuning the process of deciding on something being a
527             comment.
528              
529             =back
530              
531             =item * B