File Coverage

lib/mb/JSON.pm
Criterion Covered Total %
statement 33 55 60.0
branch 28 36 77.7
condition n/a
subroutine 3 4 75.0
pod 0 2 0.0
total 64 97 65.9


line stmt bran cond sub pod time code
1             package mb::JSON;
2             ######################################################################
3             #
4             # mb::JSON - a simple JSON parser for multibyte string
5             #
6             # http://search.cpan.org/dist/mb-JSON/
7             #
8             # Copyright (c) 2021, 2022 INABA Hitoshi in a CPAN
9             ######################################################################
10              
11 1     1   3592 use 5.00503; # Universal Consensus 1998 for primetools
  1         4  
12             # use 5.008001; # Lancaster Consensus 2013 for toolchains
13              
14             $VERSION = '0.03';
15             $VERSION = $VERSION;
16              
17 1     1   6 use strict;
  1         1  
  1         979  
18              
19             #---------------------------------------------------------------------
20             # UTF-8
21             my $utf8 = join '', qw{
22             [\x00-\x7F\x80-\xBF\xC0-\xC1\xF5-\xFF] |
23             [\xC2-\xDF][\x80-\xBF] |
24             [\xE0-\xE0][\xA0-\xBF][\x80-\xBF] |
25             [\xE1-\xEC][\x80-\xBF][\x80-\xBF] |
26             [\xED-\xED][\x80-\x9F][\x80-\xBF] |
27             [\xEE-\xEF][\x80-\xBF][\x80-\xBF] |
28             [\xF0-\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF] |
29             [\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF] |
30             [\xF4-\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF] |
31             [\x00-\xFF]
32             };
33              
34             #---------------------------------------------------------------------
35             # confess() for this module
36             sub confess {
37 0     0 0 0 my $i = 0;
38 0         0 my @confess = ();
39 0         0 while (my($package,$filename,$line,$subroutine) = caller($i)) {
40 0         0 push @confess, "[$i] $filename($line) $subroutine\n";
41 0         0 $i++;
42             }
43 0         0 print STDERR "\n", @_, "\n";
44 0         0 print STDERR CORE::reverse @confess;
45 0         0 die;
46             }
47              
48             #---------------------------------------------------------------------
49             # parse JSON data
50             sub mb::JSON::parse {
51 42 50   42 0 1476 local $_ = @_ ? $_[0] : $_;
52 42 50       103 my $U0 = ($] =~ /^5\.006/) ? 'U0' : '';
53 42         59 my $parsed = '';
54 42         146 while (not /\G \z/xmsgc) {
55              
56             # beginning of JSON's string --> beginning of Perl's string
57 172 100       819 if (/\G (") /xmsgc) {
    100          
    100          
    100          
    50          
58 41         102 $parsed .= $1;
59              
60 41         52 while (1) {
61              
62             #-------------------------------------------------------------------------------
63             # end of JSON's string then ":" --> Perl's hash key
64             #-------------------------------------------------------------------------------
65             # An object structure is represented as a pair of curly brackets
66             # surrounding zero or more name/value pairs (or members). A name is a
67             # string. A single colon comes after each name, separating the name
68             # from the value. A single comma separates a value from a following
69             # name. The names within an object SHOULD be unique.
70             #-------------------------------------------------------------------------------
71              
72 82 100       395 if (/\G ( " \s* ) : /xmsgc) {
    100          
    100          
    100          
    100          
    100          
    50          
73 21         33 $parsed .= "$1,";
74 21         44 last;
75             }
76              
77             # end of JSON's string --> end of Perl's string
78             elsif (/\G (") /xmsgc) {
79 20         32 $parsed .= $1;
80 20         51 last;
81             }
82              
83             #-------------------------------------------------------------------------------
84             # UTF-16 surrogate pair
85             #-------------------------------------------------------------------------------
86             # To escape an extended character that is not in the Basic Multilingual
87             # Plane, the character is represented as a 12-character sequence,
88             # encoding the UTF-16 surrogate pair. So, for example, a string
89             # containing only the G clef character (U+1D11E) may be represented as
90             # "\uD834\uDD1E".
91             #
92             # TIPS: in Perl, \u in a "string" means ucfirst(), so must be \\u
93             # TIPS: Don't use /i modifier, because \U is not \u
94             #-------------------------------------------------------------------------------
95              
96             elsif (/\G \\u ([Dd][89ABab][0-9A-Fa-f][0-9A-Fa-f]) \\u ([Dd][CDEFcdef][0-9A-Fa-f][0-9A-Fa-f]) /xmsgc) {
97 2         6 my $high_surrogate = hex $1;
98 2         3 my $low_surrogate = hex $2;
99 2         6 my $unicode = 0x10000 + ($high_surrogate - 0xD800) * 0x400 + ($low_surrogate - 0xDC00);
100 2 50       6 if (0) { }
101 0         0 elsif ($unicode < 0x110000) { $parsed .= pack($U0.'C*', $unicode>>18|0xF0, $unicode>>12&0x3F|0x80, $unicode>>6&0x3F|0x80, $unicode&0x3F|0x80) }
  2         11  
102 0         0 else { confess qq{@{[__FILE__]}: \\u{$1} is out of Unicode (0x0000 to 0xFFFF)}; }
  0         0  
103             }
104              
105             #-------------------------------------------------------------------------------
106             # any BMP UTF-16 codepoint
107             #-------------------------------------------------------------------------------
108             # If the character is in the Basic Multilingual Plane (U+0000 through U+FFFF),
109             # then it may be represented as a six-character sequence: a reverse solidus,
110             # followed by the lowercase letter u, followed by four hexadecimal digits that
111             # encode the character's code point. The hexadecimal letters A through F can
112             # be uppercase or lowercase.
113             #-------------------------------------------------------------------------------
114              
115             elsif (/\G \\u ([0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]) /xmsgc) {
116 2         21 my $unicode = hex $1;
117 2 50       10 if (0) { }
    50          
    50          
118 0         0 elsif ($unicode < 0x80) { $parsed .= pack($U0.'C*', $unicode ) }
  0         0  
119 0         0 elsif ($unicode < 0x800) { $parsed .= pack($U0.'C*', $unicode>>6 |0xC0, $unicode&0x3F|0x80) }
120 2         14 elsif ($unicode < 0x10000) { $parsed .= pack($U0.'C*', $unicode>>12 |0xE0, $unicode>>6&0x3F|0x80, $unicode&0x3F|0x80) }
121 0         0 else { confess qq{@{[__FILE__]}: \\u{$1} is out of Unicode (0x0000 to 0xFFFF)}; }
  0         0  
122             }
123              
124             #-------------------------------------------------------------------------------
125             # two-character sequence escape representations
126             #-------------------------------------------------------------------------------
127             # Alternatively, there are two-character sequence escape representations
128             # of some popular characters. So, for example, a string containing only
129             # a single reverse solidus character may be represented more compactly
130             # as "\\".
131             #
132             # \" quotation mark U+0022
133             # \\ reverse solidus U+005C
134             # \/ solidus U+002F
135             # \b backspace U+0008
136             # \f form feed U+000C
137             # \n line feed U+000A
138             # \r carriage return U+000D
139             # \t tab U+0009
140             #-------------------------------------------------------------------------------
141              
142             elsif (m{\G (\\["\\/bfnrt]) }xmsgc) {
143 8         13 $parsed .= $1;
144             }
145              
146             # escape $ and @ to avoid interpolation on eval() of Perl
147             elsif (/\G ([\$\@]) /xmsgc) {
148 2         5 $parsed .= "\\$1";
149             }
150              
151             # other all UTF-8 codepoints
152             elsif (/\G ($utf8) /xmsgc) {
153 27         56 $parsed .= $1;
154             }
155              
156             # panic inside "string"
157             else {
158 0         0 confess sprintf(<
159 0         0 @{[__FILE__]}: JSON data makes panic; (maybe @{[__FILE__]} has bug(s))
  0         0  
160             %s
161             END
162             }
163             }
164             }
165              
166             # JSON's "null" --> Perl's "undef"
167             elsif (/\G null \b/xmsgc) {
168 1         5 $parsed .= 'undef';
169             }
170              
171             # JSON's boolean "true" --> Perl's "1"
172             elsif (/\G true \b/xmsgc) {
173 1         4 $parsed .= '!!1';
174             }
175              
176             # JSON's boolean "false" --> Perl's "0"
177             elsif (/\G false \b/xmsgc) {
178 1         4 $parsed .= '!!0';
179             }
180              
181             # other all UTF-8 codepoints
182             elsif (/\G ($utf8) /xmsgc) {
183 128         381 $parsed .= $1;
184             }
185              
186             # panic outside "string"
187             else {
188 0         0 confess sprintf(<
189 0         0 @{[__FILE__]}: JSON data makes panic; (maybe @{[__FILE__]} has bug(s))
  0         0  
190             %s
191             END
192             }
193             }
194              
195             # return as Perl data without UTF-8 flag
196 42         1783 return eval $parsed;
197             }
198              
199             1;
200              
201             __END__