File Coverage

blib/lib/JSON/Repair.pm
Criterion Covered Total %
statement 109 176 61.9
branch 45 106 42.4
condition 22 36 61.1
subroutine 10 11 90.9
pod 1 3 33.3
total 187 332 56.3


line stmt bran cond sub pod time code
1             package JSON::Repair;
2 1     1   21846 use parent Exporter;
  1         293  
  1         7  
3             our @EXPORT_OK = qw/repair_json/;
4             our %EXPORT_TAGS = (all => \@EXPORT_OK);
5 1     1   92 use warnings;
  1         3  
  1         34  
6 1     1   4 use strict;
  1         4  
  1         15  
7 1     1   3 use utf8;
  1         0  
  1         7  
8 1     1   14 use Carp;
  1         1  
  1         64  
9              
10             # This Perl version is required because of hashes as errors from
11             # JSON::Parse.
12              
13 1     1   22 use 5.014;
  1         2  
14 1     1   521 use JSON::Parse '0.49';
  1         1050  
  1         105  
15 1     1   616 use C::Tokenize '$comment_re';
  1         3735  
  1         1697  
16             our $VERSION = '0.06';
17              
18             sub repair_json
19             {
20 9     9 1 11991 my ($broken, %options) = @_;
21 9         49 my $jp = JSON::Parse->new ();
22             # Request a hash response from $jp when there is an error.
23 9         18 $jp->diagnostics_hash (1);
24 9         11 my $verbose = $options{verbose};
25 9         8 my $output = $broken;
26 9         8 while (1) {
27             # Try various repairs. This continues until the JSON is
28             # valid, or none of the repairs have worked. After a
29             # successful repair, "next;" should be used. Falling through
30             # to the end of the while loop which started above causes an
31             # exit with an error message.
32 25         22 eval {
33 25         387 $jp->check ($output);
34             };
35 25 100       46 if (! $@) {
36 9         7 last;
37             }
38 16         21 my $error = $@->{error};
39             # print STDERR "$error\n";
40             # The type of thing where the error occurred
41 16         15 my $type = $@->{'bad type'};
42 16 100       32 if ($error eq 'Unexpected character') {
    100          
    50          
43 13         13 my $bad_byte = $@->{'bad byte contents'};
44             # $bad_byte is a number, so for convenient string
45             # comparison, turn it into a string.
46 13         16 my $bad_char = chr ($bad_byte);
47 13         8 my $valid_bytes = $@->{'valid bytes'};
48             # The position of the bad byte.
49 13         20 my $bad_pos = $@->{'bad byte position'};
50 13 50       24 if ($verbose) {
51 0         0 print "Unexpected character '$bad_char' at byte $bad_pos.\n";
52             }
53             # Everything leading up to the bad byte.
54 13         21 my $previous = substr ($output, 0, $bad_pos - 1);
55             # Everything after the bad byte.
56 13         14 my $remaining = substr ($output, $bad_pos);
57 13 100 66     63 if ($bad_char eq "'" && $valid_bytes->[ord ('"')]) {
    100 100        
58 3         6 my $string;
59             # Substitute a ' in the remaining stuff, if there is
60             # one, up to a comma or colon or an end-of marker.
61 3 50       34 if ($remaining =~ s/^([^,:\]\}]*)'(\s*[,:\]\}])/$1"$2/) {
62 3         4 my $string = $1;
63 3 100       8 if ($string =~ /"/) {
64 1         2 my $quotedstring = $string;
65 1         3 $quotedstring =~ s/"/\\"/g;
66 1         13 $remaining =~ s/^\Q$string/$quotedstring/;
67             }
68             }
69 3         9 $output = $previous . '"' . $remaining;
70 3 50       5 if ($verbose) {
71 0         0 print "Changing single to double quote.\n";
72             }
73 3         7 next;
74             }
75             # An unexpected } or ] usually means there was a comma
76             # after an array or object entry, followed by the end
77             # of the object.
78             elsif ($bad_char eq '}' || $bad_char eq ']') {
79             # Look for a comma at the end of it.
80 2 50 0     11 if ($previous =~ /,\s*$/) {
    0          
81 2         10 $previous =~ s/,(\s*)$/$1/;
82 2         3 $output = $previous . $bad_char . $remaining;
83 2 50       4 if ($verbose) {
84 0         0 print "Removing a trailing comma.\n";
85             }
86 2         4 next;
87             }
88             elsif ($bad_char eq '}' && $previous =~ /:\s*$/) {
89             # In the unlikely event that there was a colon
90             # before the end of the object, add a "null"
91             # to it.
92 0         0 $output = $previous . "null" . $remaining;
93 0         0 next;
94             }
95             else {
96 0         0 warn "Unexpected } or ] in $type\n";
97             }
98             }
99 8 100 66     37 if (($type eq 'object' || $type eq 'array' ||
      100        
100             $type eq 'initial state')) {
101             # Handle comments in these states.
102 2 50       5 if ($bad_char eq '/') {
103 0 0       0 if ($verbose) {
104 0         0 print "C-style comments in object or array?\n";
105             }
106 0         0 $remaining = $bad_char . $remaining;
107 0 0       0 if ($remaining =~ s/^($comment_re)//) {
108 0 0       0 if ($verbose) {
109 0         0 print "Deleting comment '$1'.\n";
110             }
111 0         0 $output = $previous . $remaining;
112 0         0 next;
113             }
114             }
115 2 50       3 if ($bad_char eq '#') {
116 0 0       0 if ($verbose) {
117 0         0 print "Hash comments in object or array?\n";
118             }
119 0 0       0 if ($remaining =~ s/^(.*)\n//) {
120 0 0       0 if ($verbose) {
121 0         0 print "Deleting comment '$1'.\n";
122             }
123 0         0 $output = $previous . $remaining;
124 0         0 next;
125             }
126             }
127 2 100 66     9 if ($type eq 'initial state' && $previous !~ /^\s+$/) {
128 1 50       2 if ($verbose) {
129 0         0 print "Trailing garbage '$bad_char$remaining'?\n";
130             }
131 1         2 $output = $previous;
132 1         2 next;
133             }
134             }
135 7 50 66     28 if (($type eq 'object' || $type eq 'array') &&
      66        
136             $valid_bytes->[ord (',')]) {
137 0 0       0 if ($verbose) {
138 0         0 print "Missing comma in object or array?\n";
139             }
140             # Put any space at the end of $previous before the
141             # comma, for aesthetic reasons only.
142 0         0 my $join = ',';
143 0 0       0 if ($previous =~ s/(\s+)$//) {
144 0         0 $join .= $1;
145             }
146 0         0 $join .= $bad_char;
147 0         0 $output = $previous . $join . $remaining;
148 0         0 next;
149             }
150 7 0 33     10 if ($type eq 'object' && $valid_bytes->[ord ('"')]) {
151 0 0       0 if ($verbose) {
152 0         0 print "Unquoted key or value in object?\n";
153             }
154 0 0       0 if ($remaining =~ s/(^[^\}\]:,\n\r"]*)(\s*):/$1"$2:/) {
155 0 0       0 if ($verbose) {
156 0         0 print "Adding quotes to key '$bad_char$1'\n";
157             }
158 0         0 $output = $previous . '"' . $bad_char . $remaining;
159 0         0 next;
160             }
161 0 0       0 if ($previous =~ /:\s*$/) {
162 0         0 $remaining = $bad_char . $remaining;
163 0 0       0 if ($remaining =~ s/^(.*)\n/"$1"\n/) {
164 0 0       0 if ($verbose) {
165 0         0 print "Adding quotes to unquoted value '$1'.\n";
166 0         0 $output = $previous . $remaining;
167 0         0 next;
168             }
169             }
170             }
171             }
172 7 100       10 if ($type eq 'string') {
173 4 50       6 if ($bad_byte < 0x20) {
174 4         6 $bad_char = json_escape ($bad_char);
175 4 50       8 if ($verbose) {
176 0         0 print "Changing $bad_byte into $bad_char.\n";
177             }
178 4         7 $output = $previous . $bad_char . $remaining;
179 4         5 next;
180             }
181             }
182             # Add a zero to a fraction
183 3 100 66     10 if ($bad_char eq '.' && $remaining =~ /^[0-9]+/) {
184 1         3 $output = $previous . "0." . $remaining;
185 1         1 next;
186             }
187             # Delete a leading zero on a number.
188 2 50       8 if ($type eq 'number') {
189 2 100 66     10 if ($previous =~ /0$/ && $remaining =~ /^[0-9]+/) {
190 1 50       3 if ($verbose) {
191 0         0 print "Leading zero in number?\n";
192             }
193 1         4 $previous =~ s/0$//;
194 1         2 $remaining =~ s/^0+//;
195 1         2 $output = $previous . $bad_char . $remaining;
196             # print "$output\n";
197 1         2 next;
198             }
199 1 50 33     7 if ($bad_char =~ /[eE]/ && $previous =~ /\.$/) {
200 1 50       5 if ($verbose) {
201 0         0 print "Missing zero between . and e?\n";
202             }
203 1         3 $output = $previous . "0" . $bad_char . $remaining;
204 1         2 next;
205             }
206             }
207             # print "$output\n";
208 0         0 warn "Could not handle unexpected character '$bad_char' in $type\n";
209 0 0       0 if ($verbose) {
210 0         0 print_valid_bytes ($valid_bytes);
211             }
212             }
213             elsif ($error eq 'Unexpected end of input') {
214             # for my $k (keys %{$@}) {
215             # print "$k -> $@->{$k}\n";
216             # }
217             # print "Unexpected end of input.\n";
218 2 50       9 if ($type eq 'string') {
    100          
    50          
219 0         0 $output .= '"';
220 0 0       0 if ($verbose) {
221 0         0 print "String ended unexpectedly: adding a quote.\n";
222             }
223 0         0 next;
224             }
225             elsif ($type eq 'object') {
226 1         3 $output .= '}';
227 1 50       4 if ($verbose) {
228 0         0 print "Object ended unexpectedly: adding a }.\n";
229             }
230 1         2 next;
231             }
232             elsif ($type eq 'array') {
233 1         2 $output .= ']';
234 1 50       14 if ($verbose) {
235 0         0 print "Array ended unexpectedly: adding a ].\n";
236             }
237 1         2 next;
238             }
239             else {
240             # Cannot really get an unexpected end of a number
241             # since it has no end marker, nor of the initial
242             # state. That leaves the case of literals, which might
243             # come to an unexpected end like 'tru' or something.
244 0         0 warn "Unhandled unexpected end of input in $type";
245             }
246             }
247             elsif ($error eq 'Empty input') {
248 1         1 $output = '""';
249 1 50       2 if ($verbose) {
250 0         0 print "Changing empty input to empty string \"\".\n";
251             }
252 1         2 next;
253             }
254 0 0       0 if ($verbose) {
255 0         0 print "$output\n";
256             }
257 0         0 carp "Repair failed: unhandled error $error";
258 0         0 last;
259             }
260 9         33 return $output;
261             }
262              
263             sub print_valid_bytes
264             {
265 0     0 0 0 my ($valid_bytes) = @_;
266 0         0 for my $i (0..127) {
267 0         0 my $ok = $valid_bytes->[$i];
268 0 0       0 if ($ok) {
269 0         0 print "OK: '",chr ($i),"'\n";
270             }
271             }
272             }
273              
274             # Filched from JSON::Create::PP
275              
276             sub json_escape
277             {
278 4     4 0 6 my ($input) = @_;
279 4         11 $input =~ s/("|\\)/\\$1/g;
280 4         5 $input =~ s/\x08/\\b/g;
281 4         2 $input =~ s/\f/\\f/g;
282 4         5 $input =~ s/\n/\\n/g;
283 4         4 $input =~ s/\r/\\r/g;
284 4         5 $input =~ s/\t/\\t/g;
285 4         7 $input =~ s/([\x00-\x1f])/sprintf ("\\u%04x", ord ($1))/ge;
  1         5  
286 4         10 return $input;
287             }
288              
289             1;