File Coverage

blib/lib/JSON/Streaming/Reader.pm
Criterion Covered Total %
statement 309 362 85.3
branch 166 242 68.6
condition 57 81 70.3
subroutine 39 42 92.8
pod 9 16 56.2
total 580 743 78.0


line stmt bran cond sub pod time code
1              
2             =head1 NAME
3              
4             JSON::Streaming::Reader - Read JSON strings in a streaming manner
5              
6             =cut
7              
8             package JSON::Streaming::Reader;
9              
10 7     7   863 use strict;
  7         15  
  7         281  
11 7     7   41 use warnings;
  7         11  
  7         208  
12 7     7   38 use Carp;
  7         20  
  7         734  
13 7     7   8332 use IO::Scalar;
  7         130315  
  7         577  
14 7     7   5283 use JSON::Streaming::Reader::EventWrapper;
  7         22  
  7         429  
15              
16             our $VERSION = '0.06';
17              
18 7     7   44 use constant ROOT_STATE => {};
  7         63  
  7         608  
19              
20             # Make some constants for the token types
21             BEGIN {
22 7     7   17 foreach my $token_type (qw(start_object end_object start_array end_array start_property end_property add_string add_number add_boolean add_null error)) {
23 7     7   34 no strict 'refs';
  7         13  
  7         477  
24              
25 77         140 my $full_name = __PACKAGE__."::".uc($token_type);
26              
27 77     955   230 *{$full_name} = sub { $token_type };
  77         34674  
  955         3566  
28             }
29             };
30              
31             sub for_stream {
32 194     194 1 288 my ($class, $stream) = @_;
33              
34 194         457 my $self = bless {}, $class;
35 194         408 $self->{stream} = $stream;
36 194         287 $self->{state} = ROOT_STATE;
37 194         363 $self->{state_stack} = [];
38 194         355 $self->{used} = 0;
39 194         511 return $self;
40             }
41              
42             sub for_string {
43 129     129 1 368 my ($class, $value) = @_;
44              
45 129 100       952 my $stream = IO::Scalar->new(ref $value ? $value : \$value);
46 129         5366 return $class->for_stream($stream);
47             }
48              
49             sub event_based {
50 65     65 1 368 my ($class, %callbacks) = @_;
51              
52 65         454 my $fake_stream = JSON::Streaming::Reader::EventWrapper->new();
53 65         166 my $self = $class->for_stream($fake_stream);
54 65         125 $self->{event_callbacks} = \%callbacks;
55              
56 65         190 return $self;
57             }
58              
59             sub process_tokens {
60 67     67 1 353 my ($self, %callbacks) = @_;
61              
62 67         181 while (my $token = $self->get_token()) {
63 266         374 my $token_type = shift @$token;
64 266 50       649 my $callback = $callbacks{$token_type} or Carp::croak("No callback provided for $token_type tokens");
65 266         737 $callback->(@$token);
66             }
67             }
68              
69             sub get_token {
70 963     963 1 7431 my ($self) = @_;
71              
72 963 100       2443 return undef if $self->{errored};
73              
74 867         1028 my $tok = eval {
75 867         1587 my $need_comma = $self->made_value;
76 867         1017 while (1) { # Until we find a character that's interesting
77 956         1690 $self->_eat_whitespace();
78 894         1699 my $char = $self->_peek_char();
79              
80 894 100       1802 unless (defined($char)) {
81             # EOF
82 124 100       235 die("Unexpected end of input\n") unless $self->_state == ROOT_STATE;
83 116         267 return undef;
84             }
85              
86             # If we've found more stuff while we're in the root state and we've
87             # already seen stuff then there's junk at the end of the string.
88 770 100 100     1394 die("Unexpected junk at the end of input\n") if $self->_state == ROOT_STATE && $self->{used};
89              
90 750 100 100     2123 if ($char eq ',' && ! $self->done_comma) {
91 135 100 100     376 if ($self->in_array || $self->in_object) {
    50          
92 89 50       183 if ($self->made_value) {
93 89         170 $self->_require_char(',');
94              
95 89 100       177 if($self->is_event_based) {
96 23         40 my $stream = $self->{stream};
97 23         68 $stream->complete_reading;
98 23         102 $stream->begin_reading;
99             }
100              
101 89         184 $self->_set_done_comma();
102 89         173 next;
103             }
104             }
105             elsif ($self->in_property) {
106             # If we're in a property then a comma indicates
107             # the end of the property. We exit the property state
108             # but leave the comma so that the next get_token
109             # can still see it.
110 46 100       88 die("Property has no value\n") unless $self->made_value;
111 45         98 $self->_pop_state();
112 45         84 $self->_set_made_value;
113 45         77 return [ END_PROPERTY ];
114             }
115             }
116              
117 615 100 100     1926 if ($char ne '}' && $self->in_object) {
118             # If we're in an object then we must start a property here.
119 94         191 my $name_token = $self->_get_string_token();
120 88 50       196 die "Expected string\n" unless $name_token->[0] eq ADD_STRING;
121 88         187 $self->_eat_whitespace();
122 85         182 $self->_require_char(":");
123 83         126 my $property_name = $name_token->[1];
124 83         162 my $state = $self->_push_state();
125 83         121 $state->{in_property} = 1;
126 83         166 return [ START_PROPERTY, $property_name ];
127             }
128              
129 521 100       2588 if ($char eq '{') {
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
130 68 50       168 die("Unexpected start of object\n") unless $self->can_start_value;
131 68         169 $self->_require_char('{');
132 68         158 my $state = $self->_push_state();
133 68         137 $state->{in_object} = 1;
134 68         158 return [ START_OBJECT ];
135             }
136             elsif ($char eq '}') {
137 94 50       241 die("Expected another property\n") if $self->done_comma;
138              
139             # If we're in a property then this also indicates
140             # the end of the property.
141             # We don't actually consume the } here, so the next
142             # call to get_token will see it again but it will be
143             # in the is_object state rather than is_property.
144 94 100       196 if ($self->in_property) {
145 36 100       81 die("Property has no value\n") unless $self->made_value;
146 35         79 $self->_pop_state();
147 35         64 $self->_set_made_value;
148 35         76 return [ END_PROPERTY ];
149             }
150              
151 58 100       127 die("End of object without matching start\n") unless $self->in_object;
152 55         131 $self->_require_char('}');
153 55         126 $self->_pop_state();
154 55         148 $self->_set_made_value();
155 55         123 return [ END_OBJECT ];
156             }
157             elsif ($char eq '[') {
158 62 50       142 die("Unexpected start of array\n") unless $self->can_start_value;
159 62         172 $self->_require_char('[');
160 62         139 my $state = $self->_push_state();
161 62         100 $state->{in_array} = 1;
162 62         135 return [ START_ARRAY ];
163             }
164             elsif ($char eq ']') {
165 52 100       111 die("End of array without matching start\n") unless $self->in_array;
166 50 100       120 die("Expected another value\n") if $self->done_comma;
167 48         101 $self->_require_char(']');
168 48         113 $self->_pop_state();
169 48         140 $self->_set_made_value();
170 48         99 return [ END_ARRAY ];
171             }
172             elsif ($char eq '"') {
173 70 100       164 die("Unexpected string value\n") unless $self->can_start_value;
174 69 50 66     200 die("Expected ,\n") if $need_comma && ! $self->done_comma;
175 69         161 return $self->_get_string_token();
176             }
177             elsif ($char eq 't') {
178 13 50       35 die("Unexpected boolean value\n") unless $self->can_start_value;
179 13 50 66     51 die("Expected ,\n") if $need_comma && ! $self->done_comma;
180 13         36 foreach my $c (qw(t r u e)) {
181 52         105 $self->_require_char($c);
182             }
183 12         36 $self->_set_made_value();
184 12         32 return [ ADD_BOOLEAN, 1 ];
185             }
186             elsif ($char eq 'f') {
187 14 50       35 die("Unexpected boolean value\n") unless $self->can_start_value;
188 14 100 100     56 die("Expected ,\n") if $need_comma && ! $self->done_comma;
189 13         26 foreach my $c (qw(f a l s e)) {
190 55         104 $self->_require_char($c);
191             }
192 4         13 $self->_set_made_value();
193 4         11 return [ ADD_BOOLEAN, 0 ];
194             }
195             elsif ($char eq 'n') {
196 16 50       38 die("Unexpected null\n") unless $self->can_start_value;
197 16 50 66     57 die("Expected ,\n") if $need_comma && ! $self->done_comma;
198 16         33 foreach my $c (qw(n u l l)) {
199 64         119 $self->_require_char($c);
200             }
201 15         36 $self->_set_made_value();
202 15         33 return [ ADD_NULL ];
203             }
204             elsif ($char =~ /^[\d\-]/) {
205 124 50       272 die("Unexpected number value\n") unless $self->can_start_value;
206 124 100 100     354 die("Expected ,\n") if $need_comma && ! $self->done_comma;
207 122         280 return $self->_get_number_token();
208             }
209              
210 8         42 die "Unexpected character $char\n";
211 0         0 last;
212             }
213             };
214 867 100       2279 if ($@) {
215 166 100 66     675 unless (ref($@) && $@ == JSON::Streaming::Reader::EventWrapper::UNDERRUN()) {
216 78         138 $self->{errored} = 1;
217 78         113 my $error = $@;
218 78         127 chomp $error;
219 78         148 return [ ERROR, $error ];
220             }
221             else {
222             # If it's an underrun signal from our weird event-based IO wrapper,
223             # we pass it through to the caller.
224              
225 88         261 die $@;
226             }
227             }
228              
229 701         2180 return $tok;
230              
231             }
232              
233             sub skip {
234 0     0 1 0 my ($self) = @_;
235              
236 0 0       0 Carp::croak("Can't skip() on an event-based reader") if $self->is_event_based;
237              
238 0         0 my @end_chars;
239              
240 0 0       0 @end_chars = (',', '}') if $self->in_property;
241 0 0       0 @end_chars = qw(}) if $self->in_object;
242 0 0       0 @end_chars = qw(]) if $self->in_array;
243              
244 0         0 my $start_chars = 0;
245              
246 0         0 while (1) {
247 0         0 my $peek = $self->_peek_char();
248              
249 0 0       0 die "Unexpected end of input\n" unless defined($peek);
250              
251 0 0       0 if ($peek eq '"') {
252             # Use the normal string parser to skip over the
253             # string so that strings containing our end_chars don't
254             # cause us problems.
255 0         0 $self->_parse_string();
256 0         0 next;
257             }
258              
259 0 0 0     0 if ($start_chars < 1 && grep { $_ eq $peek } @end_chars) {
  0         0  
260 0 0 0     0 unless ($self->in_property && $peek eq '}') {
261 0         0 $self->_get_char();
262             }
263 0   0     0 my $skipped_value = $self->in_object || $self->in_array;
264 0         0 $self->_pop_state();
265 0 0       0 $self->_set_made_value() if $skipped_value;
266 0         0 return;
267             }
268             else {
269 0 0 0     0 $start_chars++ if $peek eq '[' || $peek eq '{';
270 0 0 0     0 $start_chars-- if $peek eq ']' || $peek eq '}';
271 0         0 $self->_get_char();
272             }
273             }
274              
275             }
276              
277             sub slurp {
278 6     6 1 27 my ($self) = @_;
279              
280 6 50       16 Carp::croak("Can't slurp() on an event-based reader") if $self->is_event_based;
281              
282 6         13 my $start_state = $self->_state;
283 6         10 my @items = ();
284 6         7 my $current_item = undef;
285              
286             my $push_item = sub {
287 7     7   19 my $item = shift;
288 7         9 push @items, $current_item;
289 7         32 $current_item = $item;
290 6         26 };
291             my $pop_item = sub {
292 13     13   14 $current_item = pop @items;
293 13         13 return $current_item;
294 6         17 };
295             my $handle_value = sub {
296 16     16   20 my ($token, $target) = @_;
297 16         19 my $type = $token->[0];
298              
299 16 100 100     22 if ($type eq ADD_STRING || $type eq ADD_NUMBER) {
    100          
    100          
    100          
    50          
300 8         32 $$target = $token->[1];
301             }
302             elsif ($type eq ADD_BOOLEAN) {
303 2 100       12 $$target = $token->[1] ? \1 : \0;
304             }
305             elsif ($type eq ADD_NULL) {
306 1         4 $$target = undef;
307             }
308             elsif ($type eq START_OBJECT) {
309 2         3 my $new_item = {};
310 2         4 $$target = $new_item;
311 2         3 $push_item->($new_item);
312             }
313             elsif ($type eq START_ARRAY) {
314 3         4 my $new_item = [];
315 3         5 $$target = $new_item;
316 3         7 $push_item->($new_item);
317             }
318             else {
319             # This should actually never happen, since it should be caught
320             # by the underlying raw API.
321 0         0 die "Expecting a value but got a $type token\n";
322             }
323 6         24 };
324              
325 6         8 my $need_deref = 0;
326 6 50       12 if ($self->in_array) {
    50          
    50          
327 0         0 $current_item = [];
328             }
329             elsif ($self->in_object) {
330 0         0 $current_item = {};
331             }
332             elsif ($self->in_property) {
333 6         7 my $value = undef;
334 6         8 $current_item = \$value;
335 6         8 $need_deref = 1;
336             }
337             else {
338 0         0 die "Can only slurp arrays, object or properties\n";
339             }
340 6         12 my $ret_item = $current_item;
341              
342 6         12 while (my $token = $self->get_token()) {
343 31         35 my $type = $token->[0];
344              
345 31 50       49 if ($type eq ERROR) {
346 0         0 die $token->[1];
347             }
348              
349 31         49 my $item_type = ref($current_item);
350              
351 31 100 100     112 if ($item_type eq 'SCALAR' || $item_type eq 'REF') {
    100          
    50          
352             # We're expecting a value
353              
354 16 100       24 if ($type eq END_PROPERTY) {
355 8         14 $pop_item->();
356 8 100       22 last unless defined($current_item);
357             }
358             else {
359 8         14 $handle_value->($token, $current_item);
360             }
361             }
362             elsif ($item_type eq 'ARRAY') {
363 11 100       19 if ($type eq END_ARRAY) {
364 3         6 $pop_item->();
365 3 50       12 last unless defined($current_item);
366             }
367             else {
368             # We're expecting a value here too, but
369             # we're going to add it to the end of the
370             # array instead.
371 8         10 my $target = \$current_item->[scalar(@$current_item)];
372 8         16 $handle_value->($token, $target);
373             }
374             }
375             elsif ($item_type eq 'HASH') {
376             # We're expecting a property here.
377              
378 4 100       6 if ($type eq START_PROPERTY) {
    50          
379 2         5 my $name = $token->[1];
380 2         6 my $target = \$current_item->{$name};
381 2         4 $push_item->($target);
382             }
383             elsif ($type eq END_OBJECT) {
384 2         4 $pop_item->();
385 2 50       7 last unless defined($current_item);
386             }
387             else {
388 0         0 die "Not expecting $type in object state\n";
389             }
390             }
391             else {
392 0         0 die "Don't know what to do with a $item_type value\n";
393             }
394              
395             }
396              
397             # There should be nothing in $current_item by this point.
398 6 50       14 die "Unexpected end of input" if defined($current_item);
399              
400 6 50       75 return $need_deref ? $$ret_item : $ret_item;
401             }
402              
403             sub signal_eof {
404 65     65 1 126 my ($self) = @_;
405              
406 65 50       129 Carp::croak("Can't signal_eof on a non-event-based JSON reader") unless $self->is_event_based;
407              
408 65         221 $self->{stream}->signal_eof();
409              
410             # Now feed the buffer with nothing to get it to process
411             # whatever we have left in the buffer.
412 65         87 my $empty = '';
413 65         142 $self->feed_buffer(\$empty);
414             }
415              
416             sub feed_buffer {
417 171     171 1 253 my ($self, $new_data) = @_;
418              
419 171 50       325 Carp::croak("Can't feed_buffer on a non-event-based JSON reader") unless $self->is_event_based;
420              
421 171         285 my $stream = $self->{stream};
422              
423 171         549 $stream->feed_buffer($new_data);
424              
425             # Retain the peek value so we can restore it if we roll back
426 171         249 my $old_peek = $self->{peeked};
427              
428             # Start a read transaction so we can roll back if there's a buffer underrun
429 171         427 $stream->begin_reading();
430              
431 171         229 my $callbacks = $self->{event_callbacks};
432              
433             # Now get our normal, blocking parsing code to try to read tokens until we underrun the buffer.
434 171         213 eval {
435 171         325 while (my $token = $self->get_token()) {
436 208         640 $stream->complete_reading();
437              
438 208         399 my $token_type = shift @$token;
439 208 50       611 my $callback = $callbacks->{$token_type} or Carp::croak("No callback provided for $token_type tokens");
440 208         575 $callback->(@$token);
441              
442             # Start a new transaction at the end of the last token.
443 208         354 my $old_peek = $self->{peeked};
444 208         538 $stream->begin_reading();
445             }
446             };
447 171 100       404 if ($@) {
448 88         115 my $err = $@;
449 88 50 33     401 if (ref($err) && $err == JSON::Streaming::Reader::EventWrapper::UNDERRUN()) {
450             # Roll back and try again when we get more data.
451 88         244 $stream->roll_back_reading();
452 88         131 $self->{peeked} = $old_peek;
453 88         330 return;
454             }
455             else {
456             # Some other kind of error. Re-throw.
457 0         0 die $err;
458             }
459             }
460             else {
461             # We hit EOF without an underrun, so we just need to clean up now.
462 83         229 $stream->complete_reading();
463 83 50       215 my $callback = $callbacks->{eof} or Carp::croak("No callback provided for eof");
464 83         205 $callback->();
465             }
466             }
467              
468             sub _get_char {
469 2313     2313   2939 my ($self) = @_;
470              
471 2313         3794 my $ret = $self->_peek_char();
472 2306         3410 $self->{peeked} = undef;
473 2306         4194 return $ret;
474             }
475              
476             sub _require_char {
477 741     741   1045 my ($self, $required) = @_;
478              
479 741         1376 my $char = $self->_get_char();
480 738 100 100     4400 unless (defined($char) && $char eq $required) {
481 14 100       111 die "Expected $required but encountered ".(defined($char) ? $char : 'EOF')."\n";
482             }
483 724         1171 return $char;
484             }
485              
486             sub _peek_char {
487 5823     5823   6658 my ($self) = @_;
488              
489 5823 100       18521 return $self->{peeked} if defined($self->{peeked});
490              
491 2743         3132 my $buf = "";
492 2743         8255 my $success = $self->{stream}->read($buf, 1);
493              
494 2655 100       24457 unless ($success) {
495             # Assume EOF
496 314         759 return undef;
497             }
498              
499 2341         6488 return $self->{peeked} = $buf;
500             }
501              
502             sub _eat_whitespace {
503 1044     1044   1180 my ($self) = @_;
504              
505 1044         1071 while (1) {
506 1406         2264 my $char = $self->_peek_char();
507              
508 1341 100       2897 return if ! defined($char);
509 1217 100       8691 return if $char !~ /^\s/;
510 362         590 $self->_get_char();
511             }
512             }
513              
514             sub _get_digits {
515 150     150   260 my ($self) = @_;
516              
517 150         206 my $accum = "";
518              
519 150   100     265 while (defined($self->_peek_char()) && $self->_peek_char() =~ /\d/) {
520 212         476 $accum .= $self->_get_char();
521             }
522              
523             # We should have got at least one digit
524 134 100 66     666 die "Expected digits but got ".(defined $self->_peek_char() ? $self->_peek_char() : 'EOF')."\n" unless defined($accum) && $accum ne '';
    100          
525              
526 124         283 return $accum;
527             }
528              
529             sub _get_number_token {
530 122     122   153 my ($self) = @_;
531              
532 122         180 my @accum = ();
533              
534 122 100       240 if ($self->_peek_char() eq '-') {
535 9         25 push @accum, $self->_get_char();
536             }
537              
538 122         278 push @accum, $self->_get_digits;
539              
540 105 100 100     213 if (defined($self->_peek_char()) && $self->_peek_char() eq '.') {
541 18         44 push @accum, $self->_get_char();
542              
543 18         43 push @accum, $self->_get_digits;
544              
545             }
546              
547 98 100 100     285 if (defined($self->_peek_char()) && $self->_peek_char() =~ /e/i) {
548 10         28 push @accum, $self->_get_char();
549              
550 10         57 my $peek = $self->_peek_char();
551              
552 10 100 100     82 if (defined($peek) && ($peek eq '+' || $peek eq '-')) {
      66        
553 4         12 push @accum, $self->_get_char();
554             }
555              
556 10         26 push @accum, $self->_get_digits;
557             }
558              
559 96         236 $self->_set_made_value();
560              
561             # Join and convert to number
562             # Perl's numberification conveniently does what we need here.
563 96         192 return [ ADD_NUMBER, join('', @accum)+0 ];
564             }
565              
566             my %escape_chars = (
567             b => "\b",
568             f => "\f",
569             n => "\n",
570             r => "\r",
571             t => "\t",
572             "\\" => "\\",
573             "/" => "/",
574             '"' => '"',
575             );
576              
577             sub _get_string_token {
578 163     163   205 my ($self) = @_;
579              
580 163         536 $self->_require_char('"');
581              
582 159         201 my $accum = "";
583              
584 159         189 while (1) {
585 952         1723 my $char = $self->_get_char();
586              
587 948 100       2012 die "Unterminated string\n" unless defined($char);
588 944 100       1982 if ($char eq '"') {
589 150         207 last;
590             }
591              
592 794 100       1295 if ($char eq "\\") {
593 5         13 my $escape_char = $self->_get_char();
594              
595 5 50       13 die "Unfinished escape sequence\n" unless defined($escape_char);
596              
597 5 100       20 if (my $replacement = $escape_chars{$escape_char}) {
    50          
598 4         7 $accum .= $replacement;
599             }
600             elsif ($escape_char eq 'u') {
601             # TODO: Support this
602 0         0 die "\\u sequence not yet supported\n";
603             }
604             else {
605 1         5 die "Invalid escape sequence \\$escape_char\n";
606             }
607             }
608             else {
609 789         1084 $accum .= $char;
610             }
611             }
612              
613 150         293 $self->_set_made_value();
614              
615 150         274 return [ ADD_STRING, $accum ];
616             }
617              
618             sub _parse_string {
619 0     0   0 my ($self) = @_;
620              
621 0         0 $self->_require_char('"');
622              
623 0         0 my $accum = "";
624              
625             # Don't bother building the result buffer if we're called in void context
626 0         0 my $want_result = defined(wantarray());
627              
628 0         0 while (1) {
629 0         0 my $char = $self->_get_char();
630              
631 0 0       0 die "Unterminated string\n" unless defined($char);
632 0 0       0 if ($char eq '"') {
633 0         0 last;
634             }
635              
636 0 0       0 if ($char eq "\\") {
637 0         0 my $escape_char = $self->_get_char();
638              
639 0 0       0 die "Unfinished escape sequence\n" unless defined($escape_char);
640              
641 0 0       0 if (my $replacement = $escape_chars{$escape_char}) {
    0          
642 0 0       0 $accum .= $replacement if $want_result;
643             }
644             elsif ($escape_char eq 'u') {
645             # TODO: Support this
646 0         0 die "\\u sequence not yet supported\n";
647             }
648             else {
649 0         0 die "Invalid escape sequence \\$escape_char";
650             }
651             }
652             else {
653 0 0       0 $accum .= $char if $want_result;
654             }
655             }
656              
657 0         0 return $accum;
658             }
659              
660             sub _push_state {
661 213     213   268 my ($self) = @_;
662              
663 213 50 66     372 Carp::croak("Can't add anything else: JSON output is complete") if $self->_state == ROOT_STATE && $self->{used};
664              
665 213         340 $self->{used} = 1;
666              
667 213         226 push @{$self->{state_stack}}, $self->{state};
  213         455  
668              
669 213         859 $self->{state} = {
670             in_object => 0,
671             in_array => 0,
672             in_property => 0,
673             made_value => 0,
674             };
675              
676 213         410 return $self->{state};
677             }
678              
679             sub _pop_state {
680 183     183   226 my ($self) = @_;
681              
682 183         192 my $state = pop @{$self->{state_stack}};
  183         340  
683 183         361 return $self->{state} = $state;
684             }
685              
686             sub _state {
687 6078     6078   6857 my ($self) = @_;
688              
689 6078         22361 return $self->{state};
690             }
691              
692             sub in_object {
693 1042 100   1042 0 1881 return $_[0]->_state->{in_object} ? 1 : 0;
694             }
695              
696             sub in_array {
697 193 100   193 0 382 return $_[0]->_state->{in_array} ? 1 : 0;
698             }
699              
700             sub in_property {
701 513 100   513 0 967 return $_[0]->_state->{in_property} ? 1 : 0;
702             }
703              
704             sub made_value {
705 1121 100   1121 0 2182 return $_[0]->_state->{made_value} ? 1 : 0;
706             }
707              
708             sub done_comma {
709 324 100   324 0 622 return $_[0]->_state->{done_comma} ? 1 : 0;
710             }
711              
712             sub _set_made_value {
713 460 100   460   907 $_[0]->_state->{made_value} = 1 unless $_[0]->_state == ROOT_STATE;
714 460 100       969 $_[0]->_state->{done_comma} = 0 unless $_[0]->_state == ROOT_STATE;
715 460         880 $_[0]->{used} = 1;
716             }
717              
718             sub _set_done_comma {
719 89 50   89   180 $_[0]->_state->{done_comma} = 1 unless $_[0]->_state == ROOT_STATE;
720             }
721              
722             sub can_start_value {
723              
724 367 100 100 367 0 725 return 0 if $_[0]->in_property && $_[0]->made_value;
725              
726 366 50       758 return $_[0]->in_object ? 0 : 1;
727             }
728              
729             sub _expecting_property {
730 0 0   0   0 return $_[0]->in_object ? 1 : 0;
731             }
732              
733             sub is_event_based {
734 331     331 0 1114 return defined($_[0]->{event_callbacks});
735             }
736              
737             1;
738              
739             =head1 DESCRIPTION
740              
741             This module is effectively a tokenizer for JSON strings. With it you can process
742             JSON strings in customizable ways without first creating a Perl data structure
743             from the data. For some applications, such as those where the expected data
744             structure is known ahead of time, this may be a more efficient way to process
745             incoming data.
746              
747             =head1 SYNOPSIS
748              
749             my $jsonr = JSON::Streaming::Reader->for_stream($fh);
750             $jsonr->process_tokens(
751             start_object => sub {
752             ...
753             },
754             end_object => sub {
755              
756             },
757             start_property => sub {
758             my ($name) = @_;
759             },
760             # ...
761             );
762              
763             =head1 CREATING A NEW INSTANCE
764              
765             This module can operate on either an L instance or a string.
766              
767             =head2 JSON::Streaming::Reader->for_stream($fh)
768              
769             Create a new instance that will read from the provided L instance.
770             If you want to operate on a raw Perl filehandle, you currently must wrap it up in
771             an IO::Handle instance yourself.
772              
773             =head2 JSON::Streaming::Reader->for_string(\$string)
774              
775             Create a new instance that will read from the provided string. Uses L
776             to make a stream-like wrapper around the string, and passes it into C.
777              
778             =head1 CALLBACK API
779              
780             The recommended way to use this library is via the callback-based API. In this
781             API you make a single method call on the reader object and pass it a CODE ref
782             for each token type. The reader object will then consume the entire stream
783             and call the callback responding to the type of each token it encounters.
784              
785             An error token will be raised if an error is encountered during parsing.
786              
787             For tokens that themselves have data, the data items will be passed in as arguments
788             to the callback.
789              
790             The handlers for the C, C and C tokens
791             may use the C method from the pull API, as described below, to avoid processing
792             the remainder of the corresponding container.
793              
794             =head2 $jsonr->process_tokens(%callbacks)
795              
796             Read the whole stream and call a callback corresponding to each token encountered.
797              
798             =head1 PULL API
799              
800             A lower-level API is provided that allows the caller to pull single tokens
801             from the stream as necessary. The callback API is implemented in terms of the
802             pull API.
803              
804             =head2 $jsonr->get_token()
805              
806             Get the next token from the stream and advance. If the end of the stream is reached, this
807             will return C. Otherwise it returns an ARRAY ref whose first member is the
808             token type and its subsequent members are the token type's data items, if any.
809              
810             =head2 $jsonr->skip()
811              
812             Quickly skip to the end of the current container. This can be used after a C,
813             C or C token is retrieved to signal that the remainder of the
814             container is not actually required. The next call to get_token will return the token
815             that comes after the corresponding C token for the current container. The corresponding
816             C token is never returned.
817              
818             This is most useful for skipping over unrecognised properties when populating a known
819             data structure.
820              
821             It is better to use this method than to implement skipping in the caller because skipping
822             is done using a lightweight mechanism that does not need to allocate additional memory
823             for tokens encountered during skipping. However, since this method uses a simpler
824             state model it may cause less-intuitive error messages to be raised if there is a
825             JSON syntax error within the content that is skipped.
826              
827             Note that errors encountered during skip are actually raised via C rather than
828             via the return value as with C.
829              
830             =head2 $jsonr->slurp()
831              
832             Skip to the end of the current container, capturing its value.
833             This allows you to handle a C,
834             C or C token as if it were an C-type token,
835             dealing with its entire contents in one go.
836              
837             The next call to get_token will return the token
838             that comes after the corresponding C token for the current container. The corresponding
839             C token is never returned.
840              
841             The return value of this method call will be a Perl data structure
842             representing the data that was skipped. This uses the same mappings as other
843             popular Perl JSON libraries: objects become hashrefs, arrays become arrayrefs,
844             strings and integers become scalars, boolean values become references to either
845             1 or 0, and null becomes undef.
846              
847             This is useful if there is a part of the tree that you would rather handle
848             via an in-memory data structure like you'd get from a non-streaming JSON parser.
849             It allows you to mix-and-match streaming parsing and one-shot parsing
850             within a single data stream.
851              
852             Note that errors encountered during skip are actually raised via C rather than
853             via the return value as with C.
854              
855             If you call this when in property state it will return the value of the property
856             and parsing will continue after the corresponding C. In object or
857             array state it will return the object or array and continue after the corresponding
858             C or C.
859              
860             =head1 EVENT-BASED API
861              
862             This module has an experimental event-based API which can be used to
863             do streaming JSON processing in event-driven applications or those
864             which do non-blocking I/O.
865              
866             In event-based mode it is the caller's responsibility to obtain data and
867             when data is available provide it to the reader for processing. When
868             enough data is available to unambigously represent a complete, atomic token
869             a callback function is called in a similar fashion to the callback-based API
870             described above.
871              
872             The event-based API implementation is currently somewhat hacky and
873             inefficient. Caution is advised when making use of it in production
874             applications, since it is currently merely a shim over the existing
875             blocking API which may introduce strange packet-boundary bugs
876             and other misbehavior.
877              
878             =head2 JSON::Streaming::Reader->event_based(%callbacks)
879              
880             Creates and returns an event-based reader. Callbacks are provided in the same way
881             as to the C method in the callback-based API, though
882             here there is an additional pseudo-token type called 'eof' which
883             signals that the end of the stream has been reached.
884              
885             Note that at present it is not possible to use the C or C methods
886             on an event-based reader, since their implementations expect
887             to be able to block. This ought to be fixed in a future version.
888              
889             =head2 $jsonr->feed_buffer(\$data)
890              
891             The caller must call this method whenever new data becomes available
892             for processing. A call to this method causes the reader to append
893             the supplied data to any existing buffer and then try to consume as
894             many tokens as possible from the buffer before returning. A callback
895             will be run for each complete token encountered in the buffer.
896              
897             If the additional data does not allow a complete token to be recognised,
898             the reader will retain the leftover buffer and attempt parsing again
899             at the next call to C.
900              
901             In most cases this method will be called in response to some event,
902             such as a notification that more data is available to read on a socket.
903              
904             =head2 $jsonr->signal_eof()
905              
906             The caller must call this method to signal the end of the data stream.
907             This will cause the parser to process any remaining bytes in the buffer,
908             possibly running token callbacks in the process, and then call the
909             special eof callback.
910              
911             In most cases this method will be called in response to some event,
912             such as a notification that a socket stream has been closed.
913              
914             =head1 TOKEN TYPES
915              
916             There are two major classes of token types. Bracketing tokens enclose other tokens
917             and come in pairs, named with C and C prefixes. Leaf tokens stand alone
918             and have C prefixes.
919              
920             For convenience the token type names match the method names used in the "raw" API
921             of L, so it is straightforward to implement a streaming JSON
922             normalizer by feeding the output from this module into the corresponding methods on that module.
923             However, this module does have an additional special token type 'error' which is used
924             to indicate tokenizing errors and does not have a corresponding method on the writer.
925              
926             =head2 start_object, end_object
927              
928             These token types delimit a JSON object. In a valid JSON stream an object will contain
929             only properties as direct children, which will result in start_property and end_property tokens.
930              
931             =head2 start_array, end_array
932              
933             These token types delimit a JSON array. In a valid JSON stream an object will contain
934             only values as direct children, which will result in one of the value token types described
935             below.
936              
937             =head2 start_property($name), end_property
938              
939             These token types delimit a JSON property. The name of the property is given as an argument.
940             In a valid JSON stream a start_property token will always be followed by one of the value
941             token types which will itself be immediately followed by an end_property token.
942              
943             =head2 add_string($value)
944              
945             Represents a JSON string. The value of the string is passed as an argument.
946              
947             =head2 add_number($value)
948              
949             Represents a JSON number. The value of the number is passed as an argument.
950              
951             =head2 add_boolean($value)
952              
953             Represents a JSON boolean. If it's C then 1 is passed as an argument, or if C 0 is passed.
954              
955             =head2 add_null
956              
957             Represents a JSON null.
958              
959             =head2 error($string)
960              
961             Indicates a tokenization error. A human-readable description of the error is included in $string.
962              
963             =head1 STREAM BUFFERING
964              
965             Except in event-based mode, this module doesn't do any buffering.
966             It expects the underlying stream to do appropriate read buffering
967             if necessary.
968              
969             In event-based mode an internal buffer is used which retains
970             bytes that are not yet enough to unambiguously represent
971             a complete token so that it can retry when more data is available.
972             In this situation it is up to the caller to read from its
973             data source in an appropriate manner, but it is best to provide
974             as much data as possible in a single data notification.
975              
976             =head1 LICENSE
977              
978             Copyright 2009 Martin Atkins .
979              
980             This program is free software; you can redistribute it and/or modify it under
981             the same terms as Perl itself.