File Coverage

blib/lib/JSON/Repair.pm
Criterion Covered Total %
statement 108 175 61.7
branch 45 106 42.4
condition 22 36 61.1
subroutine 10 11 90.9
pod 1 3 33.3
total 186 331 56.1


line stmt bran cond sub pod time code
1             package JSON::Repair;
2 1     1   68321 use parent Exporter;
  1         294  
  1         5  
3             our @EXPORT_OK = qw/repair_json/;
4             our %EXPORT_TAGS = (all => \@EXPORT_OK);
5 1     1   80 use warnings;
  1         2  
  1         26  
6 1     1   4 use strict;
  1         2  
  1         17  
7 1     1   4 use utf8;
  1         2  
  1         6  
8 1     1   20 use Carp;
  1         2  
  1         60  
9              
10             # This Perl version is required because of hashes as errors from
11             # JSON::Parse.
12              
13 1     1   20 use 5.014;
  1         3  
14 1     1   480 use JSON::Parse '0.49';
  1         978  
  1         130  
15 1     1   455 use C::Tokenize '$comment_re';
  1         4210  
  1         1732  
16             our $VERSION = '0.07';
17              
18             sub repair_json
19             {
20 9     9 1 19911 my ($broken, %options) = @_;
21 9         80 my $jp = JSON::Parse->new ();
22             # Request a hash response from $jp when there is an error.
23 9         31 $jp->diagnostics_hash (1);
24 9         18 my $verbose = $options{verbose};
25 9         11 my $output = $broken;
26 9         14 while (1) {
27             # Try various repairs. This continues until the JSON is
28             # valid, or none of the repairs have worked. After a
29             # successful repair, "next;" should be used. Falling through
30             # to the end of the while loop which started above causes an
31             # exit with an error message.
32 25         39 eval {
33 25         511 $jp->check ($output);
34             };
35 25 100       76 if (! $@) {
36 9         13 last;
37             }
38 16         29 my $error = $@->{error};
39             # print STDERR "$error\n";
40             # The type of thing where the error occurred
41 16         26 my $type = $@->{'bad type'};
42 16 100       41 if ($error eq 'Unexpected character') {
    100          
    50          
43 13         20 my $bad_byte = $@->{'bad byte contents'};
44             # $bad_byte is a number, so for convenient string
45             # comparison, turn it into a string.
46 13         25 my $bad_char = chr ($bad_byte);
47 13         21 my $valid_bytes = $@->{'valid bytes'};
48             # The position of the bad byte.
49 13         16 my $bad_pos = $@->{'bad byte position'};
50 13 50       28 if ($verbose) {
51 0         0 print "Unexpected character '$bad_char' at byte $bad_pos.\n";
52             }
53             # Everything leading up to the bad byte.
54 13         28 my $previous = substr ($output, 0, $bad_pos - 1);
55             # Everything after the bad byte.
56 13         34 my $remaining = substr ($output, $bad_pos);
57 13 100 66     68 if ($bad_char eq "'" && $valid_bytes->[ord ('"')]) {
    100 100        
58             # Substitute a ' in the remaining stuff, if there is
59             # one, up to a comma or colon or an end-of marker.
60 3 50       37 if ($remaining =~ s/^([^,:\]\}]*)'(\s*[,:\]\}])/$1"$2/) {
61 3         8 my $string = $1;
62 3 100       10 if ($string =~ /"/) {
63 1         3 my $quotedstring = $string;
64 1         3 $quotedstring =~ s/"/\\"/g;
65 1         19 $remaining =~ s/^\Q$string/$quotedstring/;
66             }
67             }
68 3         10 $output = $previous . '"' . $remaining;
69 3 50       6 if ($verbose) {
70 0         0 print "Changing single to double quote.\n";
71             }
72 3         8 next;
73             }
74             # An unexpected } or ] usually means there was a comma
75             # after an array or object entry, followed by the end
76             # of the object.
77             elsif ($bad_char eq '}' || $bad_char eq ']') {
78             # Look for a comma at the end of it.
79 2 50 0     14 if ($previous =~ /,\s*$/) {
    0          
80 2         11 $previous =~ s/,(\s*)$/$1/;
81 2         6 $output = $previous . $bad_char . $remaining;
82 2 50       5 if ($verbose) {
83 0         0 print "Removing a trailing comma.\n";
84             }
85 2         5 next;
86             }
87             elsif ($bad_char eq '}' && $previous =~ /:\s*$/) {
88             # In the unlikely event that there was a colon
89             # before the end of the object, add a "null"
90             # to it.
91 0         0 $output = $previous . "null" . $remaining;
92 0         0 next;
93             }
94             else {
95 0         0 warn "Unexpected } or ] in $type\n";
96             }
97             }
98 8 100 66     38 if (($type eq 'object' || $type eq 'array' ||
      100        
99             $type eq 'initial state')) {
100             # Handle comments in these states.
101 2 50       5 if ($bad_char eq '/') {
102 0 0       0 if ($verbose) {
103 0         0 print "C-style comments in object or array?\n";
104             }
105 0         0 $remaining = $bad_char . $remaining;
106 0 0       0 if ($remaining =~ s/^($comment_re)//) {
107 0 0       0 if ($verbose) {
108 0         0 print "Deleting comment '$1'.\n";
109             }
110 0         0 $output = $previous . $remaining;
111 0         0 next;
112             }
113             }
114 2 50       6 if ($bad_char eq '#') {
115 0 0       0 if ($verbose) {
116 0         0 print "Hash comments in object or array?\n";
117             }
118 0 0       0 if ($remaining =~ s/^(.*)\n//) {
119 0 0       0 if ($verbose) {
120 0         0 print "Deleting comment '$1'.\n";
121             }
122 0         0 $output = $previous . $remaining;
123 0         0 next;
124             }
125             }
126 2 100 66     21 if ($type eq 'initial state' && $previous !~ /^\s+$/) {
127 1 50       4 if ($verbose) {
128 0         0 print "Trailing garbage '$bad_char$remaining'?\n";
129             }
130 1         3 $output = $previous;
131 1         3 next;
132             }
133             }
134 7 50 66     23 if (($type eq 'object' || $type eq 'array') &&
      66        
135             $valid_bytes->[ord (',')]) {
136 0 0       0 if ($verbose) {
137 0         0 print "Missing comma in object or array?\n";
138             }
139             # Put any space at the end of $previous before the
140             # comma, for aesthetic reasons only.
141 0         0 my $join = ',';
142 0 0       0 if ($previous =~ s/(\s+)$//) {
143 0         0 $join .= $1;
144             }
145 0         0 $join .= $bad_char;
146 0         0 $output = $previous . $join . $remaining;
147 0         0 next;
148             }
149 7 0 33     14 if ($type eq 'object' && $valid_bytes->[ord ('"')]) {
150 0 0       0 if ($verbose) {
151 0         0 print "Unquoted key or value in object?\n";
152             }
153 0 0       0 if ($remaining =~ s/(^[^\}\]:,\n\r"]*)(\s*):/$1"$2:/) {
154 0 0       0 if ($verbose) {
155 0         0 print "Adding quotes to key '$bad_char$1'\n";
156             }
157 0         0 $output = $previous . '"' . $bad_char . $remaining;
158 0         0 next;
159             }
160 0 0       0 if ($previous =~ /:\s*$/) {
161 0         0 $remaining = $bad_char . $remaining;
162 0 0       0 if ($remaining =~ s/^(.*)\n/"$1"\n/) {
163 0 0       0 if ($verbose) {
164 0         0 print "Adding quotes to unquoted value '$1'.\n";
165 0         0 $output = $previous . $remaining;
166 0         0 next;
167             }
168             }
169             }
170             }
171 7 100       15 if ($type eq 'string') {
172 4 50       11 if ($bad_byte < 0x20) {
173 4         12 $bad_char = json_escape ($bad_char);
174 4 50       9 if ($verbose) {
175 0         0 print "Changing $bad_byte into $bad_char.\n";
176             }
177 4         9 $output = $previous . $bad_char . $remaining;
178 4         8 next;
179             }
180             }
181             # Add a zero to a fraction
182 3 100 66     14 if ($bad_char eq '.' && $remaining =~ /^[0-9]+/) {
183 1         3 $output = $previous . "0." . $remaining;
184 1         12 next;
185             }
186             # Delete a leading zero on a number.
187 2 50       5 if ($type eq 'number') {
188 2 100 66     13 if ($previous =~ /0$/ && $remaining =~ /^[0-9]+/) {
189 1 50       3 if ($verbose) {
190 0         0 print "Leading zero in number?\n";
191             }
192 1         4 $previous =~ s/0$//;
193 1         3 $remaining =~ s/^0+//;
194 1         3 $output = $previous . $bad_char . $remaining;
195             # print "$output\n";
196 1         2 next;
197             }
198 1 50 33     18 if ($bad_char =~ /[eE]/ && $previous =~ /\.$/) {
199 1 50       3 if ($verbose) {
200 0         0 print "Missing zero between . and e?\n";
201             }
202 1         4 $output = $previous . "0" . $bad_char . $remaining;
203 1         3 next;
204             }
205             }
206             # print "$output\n";
207 0         0 warn "Could not handle unexpected character '$bad_char' in $type\n";
208 0 0       0 if ($verbose) {
209 0         0 print_valid_bytes ($valid_bytes);
210             }
211             }
212             elsif ($error eq 'Unexpected end of input') {
213             # for my $k (keys %{$@}) {
214             # print "$k -> $@->{$k}\n";
215             # }
216             # print "Unexpected end of input.\n";
217 2 50       9 if ($type eq 'string') {
    100          
    50          
218 0         0 $output .= '"';
219 0 0       0 if ($verbose) {
220 0         0 print "String ended unexpectedly: adding a quote.\n";
221             }
222 0         0 next;
223             }
224             elsif ($type eq 'object') {
225 1         3 $output .= '}';
226 1 50       3 if ($verbose) {
227 0         0 print "Object ended unexpectedly: adding a }.\n";
228             }
229 1         2 next;
230             }
231             elsif ($type eq 'array') {
232 1         2 $output .= ']';
233 1 50       4 if ($verbose) {
234 0         0 print "Array ended unexpectedly: adding a ].\n";
235             }
236 1         2 next;
237             }
238             else {
239             # Cannot really get an unexpected end of a number
240             # since it has no end marker, nor of the initial
241             # state. That leaves the case of literals, which might
242             # come to an unexpected end like 'tru' or something.
243 0         0 warn "Unhandled unexpected end of input in $type";
244             }
245             }
246             elsif ($error eq 'Empty input') {
247 1         3 $output = '""';
248 1 50       2 if ($verbose) {
249 0         0 print "Changing empty input to empty string \"\".\n";
250             }
251 1         2 next;
252             }
253 0 0       0 if ($verbose) {
254 0         0 print "$output\n";
255             }
256 0         0 carp "Repair failed: unhandled error $error";
257 0         0 last;
258             }
259 9         56 return $output;
260             }
261              
262             sub print_valid_bytes
263             {
264 0     0 0 0 my ($valid_bytes) = @_;
265 0         0 for my $i (0..127) {
266 0         0 my $ok = $valid_bytes->[$i];
267 0 0       0 if ($ok) {
268 0         0 print "OK: '",chr ($i),"'\n";
269             }
270             }
271             }
272              
273             # Filched from JSON::Create::PP
274              
275             sub json_escape
276             {
277 4     4 0 7 my ($input) = @_;
278 4         16 $input =~ s/("|\\)/\\$1/g;
279 4         7 $input =~ s/\x08/\\b/g;
280 4         7 $input =~ s/\f/\\f/g;
281 4         6 $input =~ s/\n/\\n/g;
282 4         6 $input =~ s/\r/\\r/g;
283 4         8 $input =~ s/\t/\\t/g;
284 4         12 $input =~ s/([\x00-\x1f])/sprintf ("\\u%04x", ord ($1))/ge;
  1         7  
285 4         9 return $input;
286             }
287              
288             1;