File Coverage

blib/lib/YAML/PP/Lexer.pm
Criterion Covered Total %
statement 445 554 80.3
branch 192 260 73.8
condition 37 44 84.0
subroutine 35 36 97.2
pod 0 24 0.0
total 709 918 77.2


line stmt bran cond sub pod time code
1 42     42   281 use strict;
  42         81  
  42         1234  
2 42     42   212 use warnings;
  42         95  
  42         2709  
3             package YAML::PP::Lexer;
4              
5             our $VERSION = '0.036'; # VERSION
6              
7 42 50   42   288 use constant TRACE => $ENV{YAML_PP_TRACE} ? 1 : 0;
  42         87  
  42         3573  
8 42 100 66 42   323 use constant DEBUG => ($ENV{YAML_PP_DEBUG} || $ENV{YAML_PP_TRACE}) ? 1 : 0;
  42         84  
  42         2996  
9              
10 42     42   24401 use YAML::PP::Grammar qw/ $GRAMMAR /;
  42         264  
  42         5446  
11 42     42   389 use Carp qw/ croak /;
  42         94  
  42         284612  
12              
13             sub new {
14 459     459 0 1501 my ($class, %args) = @_;
15             my $self = bless {
16             reader => $args{reader},
17 459         1350 }, $class;
18 459         1288 $self->init;
19 459         1617 return $self;
20             }
21              
22             sub init {
23 3913     3913 0 5979 my ($self) = @_;
24 3913         6673 $self->{next_tokens} = [];
25 3913         5870 $self->{next_line} = undef;
26 3913         5587 $self->{line} = 0;
27 3913         5278 $self->{offset} = 0;
28 3913         6857 $self->{flowcontext} = 0;
29             }
30              
31 15283     15283 0 24495 sub next_line { return $_[0]->{next_line} }
32 14833     14833 0 24361 sub set_next_line { $_[0]->{next_line} = $_[1] }
33 8834     8834 0 22988 sub reader { return $_[0]->{reader} }
34 1727     1727 0 5981 sub set_reader { $_[0]->{reader} = $_[1] }
35 29390     29390 0 45837 sub next_tokens { return $_[0]->{next_tokens} }
36 66037     66037 0 135953 sub line { return $_[0]->{line} }
37 0     0 0 0 sub set_line { $_[0]->{line} = $_[1] }
38 20403     20403 0 29570 sub offset { return $_[0]->{offset} }
39 20364     20364 0 31449 sub set_offset { $_[0]->{offset} = $_[1] }
40 6038     6038 0 9824 sub inc_line { return $_[0]->{line}++ }
41 19345     19345 0 39011 sub context { return $_[0]->{context} }
42 2969     2969 0 5255 sub set_context { $_[0]->{context} = $_[1] }
43 24756     24756 0 59803 sub flowcontext { return $_[0]->{flowcontext} }
44 2315     2315 0 4002 sub set_flowcontext { $_[0]->{flowcontext} = $_[1] }
45 4125     4125 0 11928 sub block { return $_[0]->{block} }
46 6049     6049 0 9984 sub set_block { $_[0]->{block} = $_[1] }
47              
48             my $RE_WS = '[\t ]';
49             my $RE_LB = '[\r\n]';
50             my $RE_DOC_END = qr/\A(\.\.\.)(?=$RE_WS|$)/m;
51             my $RE_DOC_START = qr/\A(---)(?=$RE_WS|$)/m;
52             my $RE_EOL = qr/\A($RE_WS+#.*|$RE_WS+)\z/;
53             #my $RE_COMMENT_EOL = qr/\A(#.*)?(?:$RE_LB|\z)/;
54              
55             #ns-word-char ::= ns-dec-digit | ns-ascii-letter | “-”
56             my $RE_NS_WORD_CHAR = '[0-9A-Za-z-]';
57             my $RE_URI_CHAR = '(?:' . '%[0-9a-fA-F]{2}' .'|'. q{[0-9A-Za-z#;/?:@&=+$,_.!*'\(\)\[\]-]} . ')';
58             my $RE_NS_TAG_CHAR = '(?:' . '%[0-9a-fA-F]{2}' .'|'. q{[0-9A-Za-z#;/?:@&=+$_.~*'\(\)-]} . ')';
59              
60             # [#x21-#x7E] /* 8 bit */
61             # | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] /* 16 bit */
62             # | [#x10000-#x10FFFF] /* 32 bit */
63              
64             #nb-char ::= c-printable - b-char - c-byte-order-mark
65             #my $RE_NB_CHAR = '[\x21-\x7E]';
66             my $RE_ANCHOR_CAR = '[\x21-\x2B\x2D-\x5A\x5C\x5E-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
67              
68             my $RE_PLAIN_START = '[\x21\x22\x24-\x39\x3B-\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
69             my $RE_PLAIN_END = '[\x21-\x39\x3B-\x7E\x85\xA0-\x{D7FF}\x{E000}-\x{FEFE}\x{FF00}-\x{FFFD}\x{10000}-\x{10FFFF}]';
70             my $RE_PLAIN_FIRST = '[\x24\x28-\x29\x2B\x2E-\x39\x3B-\x3D\x41-\x5A\x5C\x5E-\x5F\x61-\x7A\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
71              
72             my $RE_PLAIN_START_FLOW = '[\x21\x22\x24-\x2B\x2D-\x39\x3B-\x5A\x5C\x5E-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
73             my $RE_PLAIN_END_FLOW = '[\x21-\x2B\x2D-\x39\x3B-\x5A\x5C\x5E-\x7A\x7C\x7E\x85\xA0-\x{D7FF}\x{E000}-\x{FEFE}\x{FF00}-\x{FFFD}\x{10000}-\x{10FFFF}]';
74             my $RE_PLAIN_FIRST_FLOW = '[\x24\x28-\x29\x2B\x2E-\x39\x3B-\x3D\x41-\x5A\x5C\x5E-\x5F\x61-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
75             # c-indicators
76             #! 21
77             #" 22
78             ## 23
79             #% 25
80             #& 26
81             #' 27
82             #* 2A
83             #, 2C FLOW
84             #- 2D XX
85             #: 3A XX
86             #> 3E
87             #? 3F XX
88             #@ 40
89             #[ 5B FLOW
90             #] 5D FLOW
91             #` 60
92             #{ 7B FLOW
93             #| 7C
94             #} 7D FLOW
95              
96              
97             my $RE_PLAIN_WORD = "(?::+$RE_PLAIN_END|$RE_PLAIN_START)(?::+$RE_PLAIN_END|$RE_PLAIN_END)*";
98             my $RE_PLAIN_FIRST_WORD = "(?:[:?-]+$RE_PLAIN_END|$RE_PLAIN_FIRST)(?::+$RE_PLAIN_END|$RE_PLAIN_END)*";
99             my $RE_PLAIN_WORDS = "(?:$RE_PLAIN_FIRST_WORD(?:$RE_WS+$RE_PLAIN_WORD)*)";
100             my $RE_PLAIN_WORDS2 = "(?:$RE_PLAIN_WORD(?:$RE_WS+$RE_PLAIN_WORD)*)";
101              
102             my $RE_PLAIN_WORD_FLOW = "(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_START_FLOW)(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_END_FLOW)*";
103             my $RE_PLAIN_FIRST_WORD_FLOW = "(?:[:?-]+$RE_PLAIN_END_FLOW|$RE_PLAIN_FIRST_FLOW)(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_END_FLOW)*";
104             my $RE_PLAIN_WORDS_FLOW = "(?:$RE_PLAIN_FIRST_WORD_FLOW(?:$RE_WS+$RE_PLAIN_WORD_FLOW)*)";
105             my $RE_PLAIN_WORDS_FLOW2 = "(?:$RE_PLAIN_WORD_FLOW(?:$RE_WS+$RE_PLAIN_WORD_FLOW)*)";
106              
107              
108             #c-secondary-tag-handle ::= “!” “!”
109             #c-named-tag-handle ::= “!” ns-word-char+ “!”
110             #ns-tag-char ::= ns-uri-char - “!” - c-flow-indicator
111             #ns-global-tag-prefix ::= ns-tag-char ns-uri-char*
112             #c-ns-local-tag-prefix ::= “!” ns-uri-char*
113             my $RE_TAG = "!(?:$RE_NS_WORD_CHAR*!$RE_NS_TAG_CHAR+|$RE_NS_TAG_CHAR+|<$RE_URI_CHAR+>|)";
114              
115             #c-ns-anchor-property ::= “&” ns-anchor-name
116             #ns-char ::= nb-char - s-white
117             #ns-anchor-char ::= ns-char - c-flow-indicator
118             #ns-anchor-name ::= ns-anchor-char+
119              
120             my $RE_SEQSTART = qr/\A(-)(?=$RE_WS|$)/m;
121             my $RE_COMPLEX = qr/(\?)(?=$RE_WS|$)/m;
122             my $RE_COMPLEXCOLON = qr/\A(:)(?=$RE_WS|$)/m;
123             my $RE_ANCHOR = "&$RE_ANCHOR_CAR+";
124             my $RE_ALIAS = "\\*$RE_ANCHOR_CAR+";
125              
126              
127             my %REGEXES = (
128             ANCHOR => qr{($RE_ANCHOR)},
129             TAG => qr{($RE_TAG)},
130             ALIAS => qr{($RE_ALIAS)},
131             SINGLEQUOTED => qr{(?:''|[^'\r\n]+)*},
132             );
133              
134             sub _fetch_next_line {
135 9540     9540   16218 my ($self) = @_;
136 9540         16805 my $next_line = $self->next_line;
137 9540 100       18235 if (defined $next_line ) {
138 706         1307 return $next_line;
139             }
140              
141 8834         15819 my $line = $self->reader->readline;
142 8834 100       20270 unless (defined $line) {
143 2796         6685 $self->set_next_line(undef);
144 2796         5640 return;
145             }
146 6038         14746 $self->set_block(1);
147 6038         12881 $self->inc_line;
148 6038 50       22946 $line =~ m/\A( *)([^\r\n]*)([\r\n]|\z)/ or die "Unexpected";
149 6038         20728 $next_line = [ $1, $2, $3 ];
150 6038         14583 $self->set_next_line($next_line);
151             # $ESCAPE_CHAR from YAML.pm
152 6038 100       15470 if ($line =~ tr/\x00-\x08\x0b-\x0c\x0e-\x1f//) {
153 29         60 $self->exception("Control characters are not allowed");
154             }
155              
156 6009         11803 return $next_line;
157             }
158              
159             my %TOKEN_NAMES = (
160             '"' => 'DOUBLEQUOTE',
161             "'" => 'SINGLEQUOTE',
162             '|' => 'LITERAL',
163             '>' => 'FOLDED',
164             '!' => 'TAG',
165             '*' => 'ALIAS',
166             '&' => 'ANCHOR',
167             ':' => 'COLON',
168             '-' => 'DASH',
169             '?' => 'QUESTION',
170             '[' => 'FLOWSEQ_START',
171             ']' => 'FLOWSEQ_END',
172             '{' => 'FLOWMAP_START',
173             '}' => 'FLOWMAP_END',
174             ',' => 'FLOW_COMMA',
175             '---' => 'DOC_START',
176             '...' => 'DOC_END',
177             );
178              
179              
180             sub fetch_next_tokens {
181 7260     7260 0 11989 my ($self) = @_;
182 7260         13309 my $next = $self->next_tokens;
183 7260 100       15014 return $next if @$next;
184              
185 7123         13053 my $next_line = $self->_fetch_next_line;
186 7094 100       13843 if (not $next_line) {
187 1669         4074 return [];
188             }
189              
190 5425         9041 my $spaces = $next_line->[0];
191 5425         8488 my $yaml = \$next_line->[1];
192 5425 100       13154 if (not length $$yaml) {
193 67         291 $self->_push_tokens([ EOL => join('', @$next_line), $self->line ]);
194 67         201 $self->set_next_line(undef);
195 67         185 return $next;
196             }
197 5358 100       13650 if (substr($$yaml, 0, 1) eq '#') {
198 61         215 $self->_push_tokens([ EOL => join('', @$next_line), $self->line ]);
199 61         189 $self->set_next_line(undef);
200 61         200 return $next;
201             }
202 5297 100 100     19008 if (not $spaces and substr($$yaml, 0, 1) eq "%") {
203 67         274 $self->_fetch_next_tokens_directive($yaml, $next_line->[2]);
204 67         225 $self->set_context(0);
205 67         161 $self->set_next_line(undef);
206 67         214 return $next;
207             }
208 5230 100 100     36512 if (not $spaces and $$yaml =~ s/\A(---|\.\.\.)(?=$RE_WS|\z)//) {
    100 100        
209 1497         5791 $self->_push_tokens([ $TOKEN_NAMES{ $1 } => $1, $self->line ]);
210             }
211             elsif ($self->flowcontext and $$yaml =~ m/\A[ \t]+(#.*)?\z/) {
212 50         231 $self->_push_tokens([ EOL => join('', @$next_line), $self->line ]);
213 50         180 $self->set_next_line(undef);
214 50         155 return $next;
215             }
216             else {
217 3683         7329 $self->_push_tokens([ SPACE => $spaces, $self->line ]);
218             }
219              
220 5180         12923 my $partial = $self->_fetch_next_tokens($next_line);
221 5178 100       10513 unless ($partial) {
222 1544         3099 $self->set_next_line(undef);
223             }
224 5178         14280 return $next;
225             }
226              
227             my %ANCHOR_ALIAS_TAG = ( '&' => 1, '*' => 1, '!' => 1 );
228             my %BLOCK_SCALAR = ( '|' => 1, '>' => 1 );
229             my %COLON_DASH_QUESTION = ( ':' => 1, '-' => 1, '?' => 1 );
230             my %QUOTED = ( '"' => 1, "'" => 1 );
231             my %FLOW = ( '{' => 1, '[' => 1, '}' => 1, ']' => 1, ',' => 1 );
232             my %CONTEXT = ( '"' => 1, "'" => 1, '>' => 1, '|' => 1 );
233              
234             my $RE_ESCAPES = qr{(?:
235             \\([ \\\/_0abefnrtvLNP\t"]) | \\x([0-9a-fA-F]{2})
236             | \\u([A-Fa-f0-9]{4}) | \\U([A-Fa-f0-9]{4,8})
237             )}x;
238             my %CONTROL = (
239             '\\' => '\\', '/' => '/', n => "\n", t => "\t", r => "\r", b => "\b",
240             'a' => "\a", 'b' => "\b", 'e' => "\e", 'f' => "\f", 'v' => "\x0b", "\t" => "\t",
241             'P' => "\x{2029}", L => "\x{2028}", 'N' => "\x85",
242             '0' => "\0", '_' => "\xa0", ' ' => ' ', q/"/ => q/"/,
243             );
244              
245             sub _fetch_next_tokens {
246 8457     8457   10217 TRACE and warn __PACKAGE__.':'.__LINE__.": _fetch_next_tokens\n";
247 8457         13378 my ($self, $next_line) = @_;
248              
249 8457         13283 my $yaml = \$next_line->[1];
250 8457         13546 my $eol = $next_line->[2];
251              
252 8457         10928 my @tokens;
253              
254 8457         10505 while (1) {
255 21305 100       40914 unless (length $$yaml) {
256 1960         3650 push @tokens, ( EOL => $eol, $self->line );
257 1960         4904 $self->_push_tokens(\@tokens);
258 1960         5388 return;
259             }
260 19345         32024 my $first = substr($$yaml, 0, 1);
261 19345         24285 my $plain = 0;
262              
263 19345 100       31874 if ($self->context) {
264 1451 100       6406 if ($$yaml =~ s/\A($RE_WS*)://) {
265 1 50       12 push @tokens, ( WS => $1, $self->line ) if $1;
266 1         6 push @tokens, ( COLON => ':', $self->line );
267 1         4 $self->set_context(0);
268 1         2 next;
269             }
270 1450 50       4742 if ($$yaml =~ s/\A($RE_WS*(?: #.*))\z//) {
271 0         0 push @tokens, ( EOL => $1 . $eol, $self->line );
272 0         0 $self->_push_tokens(\@tokens);
273 0         0 return;
274             }
275 1450         2706 $self->set_context(0);
276             }
277 19344 100 100     70334 if ($CONTEXT{ $first }) {
    100          
    100          
    100          
    100          
278 2594         4798 push @tokens, ( CONTEXT => $first, $self->line );
279 2594         6193 $self->_push_tokens(\@tokens);
280 2594         6922 return 1;
281             }
282             elsif ($COLON_DASH_QUESTION{ $first }) {
283 4450         7192 my $token_name = $TOKEN_NAMES{ $first };
284 4450 100 66     51325 if ($$yaml =~ s/\A\Q$first\E($RE_WS+|\z)//) {
    50          
285 4329         10485 my $after = $1;
286 4329 100 100     8201 if (not $self->flowcontext and not $self->block) {
287 2         6 push @tokens, ERROR => $first . $after, $self->line;
288 2         6 $self->_push_tokens(\@tokens);
289 2         8 $self->exception("Tabs can not be used for indentation");
290             }
291 4327 100       10814 if ($after =~ tr/\t//) {
292 11         39 $self->set_block(0);
293             }
294 4327         7387 my $token_name = $TOKEN_NAMES{ $first };
295 4327         8296 push @tokens, ( $token_name => $first, $self->line );
296 4327 50       10106 if (not defined $1) {
297 0         0 push @tokens, ( EOL => $eol, $self->line );
298 0         0 $self->_push_tokens(\@tokens);
299 0         0 return;
300             }
301 4327         6510 my $ws = $1;
302 4327 100       15415 if ($$yaml =~ s/\A(#.*|)\z//) {
303 770         2226 push @tokens, ( EOL => $ws . $1 . $eol, $self->line );
304 770         2013 $self->_push_tokens(\@tokens);
305 770         2688 return;
306             }
307 3557         7045 push @tokens, ( WS => $ws, $self->line );
308 3557         8878 next;
309             }
310             elsif ($self->flowcontext and $$yaml =~ s/\A:(?=[,\{\}\[\]])//) {
311 0         0 push @tokens, ( $token_name => $first, $self->line );
312 0         0 next;
313             }
314 121         255 $plain = 1;
315             }
316             elsif ($ANCHOR_ALIAS_TAG{ $first }) {
317 1935         3215 my $token_name = $TOKEN_NAMES{ $first };
318 1935         3048 my $REGEX = $REGEXES{ $token_name };
319 1935 50       60407 if ($$yaml =~ s/\A$REGEX//) {
320 1935         5726 push @tokens, ( $token_name => $1, $self->line );
321             }
322             else {
323 0         0 push @tokens, ( "Invalid $token_name" => $$yaml, $self->line );
324 0         0 $self->_push_tokens(\@tokens);
325 0         0 return;
326             }
327             }
328             elsif ($first eq ' ' or $first eq "\t") {
329 3785 50       21446 if ($$yaml =~ s/\A($RE_WS+)//) {
330 3785         7881 my $ws = $1;
331 3785 100       13373 if ($$yaml =~ s/\A((?:#.*)?\z)//) {
332 19         70 push @tokens, ( EOL => $ws . $1 . $eol, $self->line );
333 19         55 $self->_push_tokens(\@tokens);
334 19         72 return;
335             }
336 3766         7509 push @tokens, ( WS => $ws, $self->line );
337             }
338             }
339             elsif ($FLOW{ $first }) {
340 3589         8191 push @tokens, ( $TOKEN_NAMES{ $first } => $first, $self->line );
341 3589         8962 substr($$yaml, 0, 1, '');
342 3589         6409 my $flowcontext = $self->flowcontext;
343 3589 100 100     15753 if ($first eq '{' or $first eq '[') {
    100 100        
344 1158         2165 $self->set_flowcontext(++$flowcontext);
345             }
346             elsif ($first eq '}' or $first eq ']') {
347 1157         2259 $self->set_flowcontext(--$flowcontext);
348             }
349             }
350             else {
351 2991         4753 $plain = 1;
352             }
353              
354 12402 100       24654 if ($plain) {
355 3112         5643 push @tokens, ( CONTEXT => '', $self->line );
356 3112         7332 $self->_push_tokens(\@tokens);
357 3112         8665 return 1;
358             }
359              
360             }
361              
362 0         0 return;
363             }
364              
365             sub fetch_plain {
366 3109     3109 0 6248 my ($self, $indent, $context) = @_;
367 3109         5880 my $next_line = $self->next_line;
368 3109         5548 my $yaml = \$next_line->[1];
369 3109         4587 my $eol = $next_line->[2];
370 3109         4868 my $REGEX = $RE_PLAIN_WORDS;
371 3109 100       4967 if ($self->flowcontext) {
372 222         463 $REGEX = $RE_PLAIN_WORDS_FLOW;
373             }
374              
375 3109         4403 my @tokens;
376 3109 100       50872 unless ($$yaml =~ s/\A($REGEX)//) {
377 2         9 $self->_push_tokens(\@tokens);
378 2         7 $self->exception("Invalid plain scalar");
379             }
380 3107         9645 my $plain = $1;
381 3107         7168 push @tokens, ( PLAIN => $plain, $self->line );
382              
383 3107 100       19096 if ($$yaml =~ s/\A(?:($RE_WS+#.*)|($RE_WS*))\z//) {
384 1774 100       4616 if (defined $1) {
385 1         7 push @tokens, ( EOL => $1 . $eol, $self->line );
386 1         5 $self->_push_tokens(\@tokens);
387 1         3 $self->set_next_line(undef);
388 1         4 return;
389             }
390             else {
391 1773         5006 push @tokens, ( EOL => $2. $eol, $self->line );
392 1773         3931 $self->set_next_line(undef);
393             }
394             }
395             else {
396 1333         4972 $self->_push_tokens(\@tokens);
397 1333         3698 my $partial = $self->_fetch_next_tokens($next_line);
398 1333 100       2860 if (not $partial) {
399 303         664 $self->set_next_line(undef);
400             }
401 1333         4848 return;
402             }
403              
404 1773         3232 my $RE2 = $RE_PLAIN_WORDS2;
405 1773 50       3025 if ($self->flowcontext) {
406 0         0 $RE2 = $RE_PLAIN_WORDS_FLOW2;
407             }
408 1773         2635 my $fetch_next = 0;
409 1773         4450 my @lines = ($plain);
410 1773         2416 my @next;
411 1773         2515 LOOP: while (1) {
412 1778         3606 $next_line = $self->_fetch_next_line;
413 1778 100       4013 if (not $next_line) {
414 1086         2647 last LOOP;
415             }
416 692         1271 my $spaces = $next_line->[0];
417 692         1160 my $yaml = \$next_line->[1];
418 692         1076 my $eol = $next_line->[2];
419              
420 692 100       1533 if (not length $$yaml) {
421 5         17 push @tokens, ( EOL => $spaces . $eol, $self->line );
422 5         16 $self->set_next_line(undef);
423 5         8 push @lines, '';
424 5         22 next LOOP;
425             }
426              
427 687 100 100     4733 if (not $spaces and $$yaml =~ s/\A(---|\.\.\.)(?=$RE_WS|\z)//) {
428 132         516 push @next, $TOKEN_NAMES{ $1 } => $1, $self->line;
429 132         224 $fetch_next = 1;
430 132         314 last LOOP;
431             }
432 555 50       1367 if ((length $spaces) < $indent) {
433 555         1202 last LOOP;
434             }
435              
436 0         0 my $ws = '';
437 0 0       0 if ($$yaml =~ s/\A($RE_WS+)//) {
438 0         0 $ws = $1;
439             }
440 0 0       0 if (not length $$yaml) {
441 0         0 push @tokens, ( EOL => $spaces . $ws . $eol, $self->line );
442 0         0 $self->set_next_line(undef);
443 0         0 push @lines, '';
444 0         0 next LOOP;
445             }
446 0 0       0 if ($$yaml =~ s/\A(#.*)\z//) {
447 0         0 push @tokens, ( EOL => $spaces . $ws . $1 . $eol, $self->line );
448 0         0 $self->set_next_line(undef);
449 0         0 last LOOP;
450             }
451              
452 0 0       0 if ($$yaml =~ s/\A($RE2)//) {
453 0         0 push @tokens, INDENT => $spaces, $self->line;
454 0         0 push @tokens, WS => $ws, $self->line;
455 0         0 push @tokens, PLAIN => $1, $self->line;
456 0         0 push @lines, $1;
457 0         0 my $ws = '';
458 0 0       0 if ($$yaml =~ s/\A($RE_WS+)//) {
459 0         0 $ws = $1;
460             }
461 0 0       0 if (not length $$yaml) {
462 0         0 push @tokens, EOL => $ws . $eol, $self->line;
463 0         0 $self->set_next_line(undef);
464 0         0 next LOOP;
465             }
466              
467 0 0       0 if ($$yaml =~ s/\A(#.*)\z//) {
468 0         0 push @tokens, EOL => $ws . $1 . $eol, $self->line;
469 0         0 $self->set_next_line(undef);
470 0         0 last LOOP;
471             }
472             else {
473 0 0       0 push @tokens, WS => $ws, $self->line if $ws;
474 0         0 $fetch_next = 1;
475             }
476             }
477             else {
478 0         0 push @tokens, SPACE => $spaces, $self->line;
479 0         0 push @tokens, WS => $ws, $self->line;
480 0 0       0 if ($self->flowcontext) {
481 0         0 $fetch_next = 1;
482             }
483             else {
484 0         0 push @tokens, ERROR => $$yaml, $self->line;
485             }
486             }
487              
488 0         0 last LOOP;
489              
490             }
491             # remove empty lines at the end
492 1773   66     4906 while (@lines > 1 and $lines[-1] eq '') {
493 5         28 pop @lines;
494             }
495 1773 50       3542 if (@lines > 1) {
496 0         0 my $value = YAML::PP::Render->render_multi_val(\@lines);
497 0         0 my @eol;
498 0 0       0 if ($tokens[-3] eq 'EOL') {
499 0         0 @eol = splice @tokens, -3;
500             }
501 0         0 $self->push_subtokens( { name => 'PLAIN_MULTI', value => $value }, \@tokens);
502 0         0 $self->_push_tokens([ @eol, @next ]);
503             }
504             else {
505 1773         5988 $self->_push_tokens([ @tokens, @next ]);
506             }
507 1773         4620 @tokens = ();
508 1773 100       3570 if ($fetch_next) {
509 132         353 my $partial = $self->_fetch_next_tokens($next_line);
510 132 100       365 if (not $partial) {
511 124         309 $self->set_next_line(undef);
512             }
513             }
514 1773         6463 return;
515             }
516              
517             sub fetch_block {
518 195     195 0 481 my ($self, $indent, $context) = @_;
519 195         398 my $next_line = $self->next_line;
520 195         352 my $yaml = \$next_line->[1];
521 195         311 my $eol = $next_line->[2];
522              
523 195         275 my @tokens;
524 195         353 my $token_name = $TOKEN_NAMES{ $context };
525 195 50       1733 $$yaml =~ s/\A\Q$context\E// or die "Unexpected";
526 195         630 push @tokens, ( $token_name => $context, $self->line );
527 195         349 my $current_indent = $indent;
528 195         301 my $started = 0;
529 195         288 my $set_indent = 0;
530 195         388 my $chomp = '';
531 195 100       1031 if ($$yaml =~ s/\A([1-9])([+-]?)//) {
    100          
532 52         125 push @tokens, ( BLOCK_SCALAR_INDENT => $1, $self->line );
533 52         112 $set_indent = $1;
534 52 100       169 $chomp = $2 if $2;
535 52 100       138 push @tokens, ( BLOCK_SCALAR_CHOMP => $2, $self->line ) if $2;
536             }
537             elsif ($$yaml =~ s/\A([+-])([1-9])?//) {
538 61         158 push @tokens, ( BLOCK_SCALAR_CHOMP => $1, $self->line );
539 61         123 $chomp = $1;
540 61 50       157 push @tokens, ( BLOCK_SCALAR_INDENT => $2, $self->line ) if $2;
541 61 50       140 $set_indent = $2 if $2;
542             }
543 195 100       420 if ($set_indent) {
544 52         77 $started = 1;
545 52 100       119 $indent-- if $indent > 0;
546 52         113 $current_indent = $indent + $set_indent;
547             }
548 195 100       594 if (not length $$yaml) {
    50          
549 193         449 push @tokens, ( EOL => $eol, $self->line );
550             }
551             elsif ($$yaml =~ s/\A($RE_WS*(?:$RE_WS#.*|))\z//) {
552 0         0 push @tokens, ( EOL => $1 . $eol, $self->line );
553             }
554             else {
555 2         17 $self->_push_tokens(\@tokens);
556 2         13 $self->exception("Invalid block scalar");
557             }
558              
559 193         311 my @lines;
560 193         265 while (1) {
561 521         1160 $self->set_next_line(undef);
562 521         898 $next_line = $self->_fetch_next_line;
563 521 100       1086 if (not $next_line) {
564 41         77 last;
565             }
566 480         754 my $spaces = $next_line->[0];
567 480         647 my $content = $next_line->[1];
568 480         672 my $eol = $next_line->[2];
569 480 100 100     2122 if (not $spaces and $content =~ m/\A(---|\.\.\.)(?=$RE_WS|\z)/) {
570 60         147 last;
571             }
572 420 100       942 if ((length $spaces) < $current_indent) {
573 179 100       387 if (length $content) {
574 92 100       277 if ($content =~ m/\A\t/) {
575 1         18 $self->_push_tokens(\@tokens);
576 1         4 $self->exception("Invalid block scalar");
577             }
578 91         208 last;
579             }
580             else {
581 87         164 push @lines, '';
582 87         246 push @tokens, ( EOL => $spaces . $eol, $self->line );
583 87         181 next;
584             }
585             }
586 241 100       499 if ((length $spaces) > $current_indent) {
587 203 100       448 if ($started) {
588 106         538 ($spaces, my $more_spaces) = unpack "a${current_indent}a*", $spaces;
589 106         237 $content = $more_spaces . $content;
590             }
591             }
592 241 50       590 unless (length $content) {
593 0         0 push @lines, '';
594 0         0 push @tokens, ( INDENT => $spaces, $self->line, EOL => $eol, $self->line );
595 0 0       0 unless ($started) {
596 0         0 $current_indent = length $spaces;
597             }
598 0         0 next;
599             }
600 241 100       498 unless ($started) {
601 119         171 $started = 1;
602 119         171 $current_indent = length $spaces;
603             }
604 241         431 push @lines, $content;
605 241         516 push @tokens, (
606             INDENT => $spaces, $self->line,
607             BLOCK_SCALAR_CONTENT => $content, $self->line,
608             EOL => $eol, $self->line,
609             );
610             }
611 192         777 my $value = YAML::PP::Render->render_block_scalar($context, $chomp, \@lines);
612 192         567 my @eol = splice @tokens, -3;
613 192         882 $self->push_subtokens( { name => 'BLOCK_SCALAR', value => $value }, \@tokens );
614 192         711 $self->_push_tokens([ @eol ]);
615 192         968 return 0;
616             }
617              
618             sub fetch_quoted {
619 2399     2399 0 4606 my ($self, $indent, $context) = @_;
620 2399         4385 my $next_line = $self->next_line;
621 2399         4137 my $yaml = \$next_line->[1];
622 2399         3795 my $spaces = $next_line->[0];
623              
624 2399         3950 my $token_name = $TOKEN_NAMES{ $context };
625 2399 50       21298 $$yaml =~ s/\A\Q$context// or die "Unexpected";;
626 2399         6843 my @tokens = ( $token_name => $context, $self->line );
627              
628 2399         3834 my $start = 1;
629 2399         3056 my @values;
630 2399         3391 while (1) {
631              
632 2517 100       4603 unless ($start) {
633 118 50       259 $next_line = $self->_fetch_next_line or do {
634 0         0 for (my $i = 0; $i < @tokens; $i+= 3) {
635 0         0 my $token = $tokens[ $i + 1 ];
636 0 0       0 if (ref $token) {
637 0         0 $tokens[ $i + 1 ] = $token->{orig};
638             }
639             }
640 0         0 $self->_push_tokens(\@tokens);
641 0         0 $self->exception("Missing closing quote <$context> at EOF");
642             };
643 118         195 $start = 0;
644 118         199 $spaces = $next_line->[0];
645 118         230 $yaml = \$next_line->[1];
646              
647 118 50 33     560 if (not length $$yaml) {
    50          
    50          
648 0         0 push @tokens, ( EOL => $spaces . $next_line->[2], $self->line );
649 0         0 $self->set_next_line(undef);
650 0         0 push @values, { value => '', orig => '' };
651 0         0 next;
652             }
653             elsif (not $spaces and $$yaml =~ m/\A(---|\.\.\.)(?=$RE_WS|\z)/) {
654 0         0 for (my $i = 0; $i < @tokens; $i+= 3) {
655 0         0 my $token = $tokens[ $i + 1 ];
656 0 0       0 if (ref $token) {
657 0         0 $tokens[ $i + 1 ] = $token->{orig};
658             }
659             }
660 0         0 $self->_push_tokens(\@tokens);
661 0         0 $self->exception("Missing closing quote <$context> or invalid document marker");
662             }
663             elsif ((length $spaces) < $indent) {
664 0         0 for (my $i = 0; $i < @tokens; $i+= 3) {
665 0         0 my $token = $tokens[ $i + 1 ];
666 0 0       0 if (ref $token) {
667 0         0 $tokens[ $i + 1 ] = $token->{orig};
668             }
669             }
670 0         0 $self->_push_tokens(\@tokens);
671 0         0 $self->exception("Wrong indendation or missing closing quote <$context>");
672             }
673              
674 118 100       696 if ($$yaml =~ s/\A($RE_WS+)//) {
675 18         64 $spaces .= $1;
676             }
677 118         301 push @tokens, ( WS => $spaces, $self->line );
678             }
679              
680 2517         5796 my $v = $self->_read_quoted_tokens($start, $context, $yaml, \@tokens);
681 2517         3931 push @values, $v;
682 2517 100       5419 if ($tokens[-3] eq $token_name) {
683 2399 100       4257 if ($start) {
684             $self->push_subtokens(
685             { name => 'QUOTED', value => $v->{value} }, \@tokens
686 2287         8423 );
687             }
688             else {
689 112         432 my $value = YAML::PP::Render->render_quoted($context, \@values);
690 112         435 $self->push_subtokens(
691             { name => 'QUOTED_MULTILINE', value => $value }, \@tokens
692             );
693             }
694 2399 100       4373 $self->set_context(1) if $self->flowcontext;
695 2399 100       5266 if (length $$yaml) {
696 1812         3560 my $partial = $self->_fetch_next_tokens($next_line);
697 1812 100       3546 if (not $partial) {
698 778         1516 $self->set_next_line(undef);
699             }
700 1812         8591 return 0;
701             }
702             else {
703 587         1351 @tokens = ();
704 587         1212 push @tokens, ( EOL => $next_line->[2], $self->line );
705 587         1535 $self->_push_tokens(\@tokens);
706 587         1508 $self->set_next_line(undef);
707 587         2966 return;
708             }
709             }
710 118         212 $tokens[-2] .= $next_line->[2];
711 118         290 $self->set_next_line(undef);
712 118         179 $start = 0;
713             }
714             }
715              
716             sub _read_quoted_tokens {
717 2517     2517   4974 my ($self, $start, $first, $yaml, $tokens) = @_;
718 2517         3423 my $quoted = '';
719 2517         3543 my $decoded = '';
720 2517         3763 my $token_name = $TOKEN_NAMES{ $first };
721 2517         3549 my $eol = '';
722 2517 100       4594 if ($first eq "'") {
723 1572         2289 my $regex = $REGEXES{SINGLEQUOTED};
724 1572 50       10872 if ($$yaml =~ s/\A($regex)//) {
725 1572         4114 $quoted .= $1;
726 1572         2341 $decoded .= $1;
727 1572         3211 $decoded =~ s/''/'/g;
728             }
729 1572 50       4115 unless (length $$yaml) {
730 0 0       0 if ($quoted =~ s/($RE_WS+)\z//) {
731 0         0 $eol = $1;
732 0         0 $decoded =~ s/($eol)\z//;
733             }
734             }
735             }
736             else {
737 945         1996 ($quoted, $decoded, $eol) = $self->_read_doublequoted($yaml);
738             }
739 2517         7553 my $value = { value => $decoded, orig => $quoted };
740              
741 2517 100       14257 if ($$yaml =~ s/\A$first//) {
742 2399 100       4816 if ($start) {
743 2287         5975 push @$tokens, ( $token_name . 'D' => $value, $self->line );
744             }
745             else {
746 112         337 push @$tokens, ( $token_name . 'D_LINE' => $value, $self->line );
747             }
748 2399         4738 push @$tokens, ( $token_name => $first, $self->line );
749 2399         6293 return $value;
750             }
751 118 50       358 if (length $$yaml) {
752 0         0 push @$tokens, ( $token_name . 'D' => $value->{orig}, $self->line );
753 0         0 $self->_push_tokens($tokens);
754 0         0 $self->exception("Invalid quoted <$first> string");
755             }
756              
757 118         347 push @$tokens, ( $token_name . 'D_LINE' => $value, $self->line );
758 118         264 push @$tokens, ( EOL => $eol, $self->line );
759              
760 118         250 return $value;
761             }
762              
763             sub _read_doublequoted {
764 945     945   1578 my ($self, $yaml) = @_;
765 945         1332 my $quoted = '';
766 945         1311 my $decoded = '';
767 945         1271 my $eol = '';
768 945         1196 while (1) {
769 2756         3666 my $last = 1;
770 2756 100       9326 if ($$yaml =~ s/\A([^"\\ \t]+)//) {
771 1557         3524 $quoted .= $1;
772 1557         2319 $decoded .= $1;
773 1557         2116 $last = 0;
774             }
775 2756 100       12258 if ($$yaml =~ s/\A($RE_ESCAPES)//) {
776 528         1225 $quoted .= $1;
777 528 100       1968 my $dec = defined $2 ? $CONTROL{ $2 }
    100          
    100          
778             : defined $3 ? chr hex $3
779             : defined $4 ? chr hex $4
780             : chr hex $5;
781 528         989 $decoded .= $dec;
782 528         954 $last = 0;
783             }
784 2756 100       8038 if ($$yaml =~ s/\A([ \t]+)//) {
785 711         1337 my $spaces = $1;
786 711 100       1476 if (length $$yaml) {
787 675         962 $quoted .= $spaces;
788 675         912 $decoded .= $spaces;
789 675         1045 $last = 0;
790             }
791             else {
792 36         76 $eol = $spaces;
793 36         83 last;
794             }
795             }
796 2720 100       6828 if ($$yaml =~ s/\A(\\)\z//) {
797 8         17 $quoted .= $1;
798 8         13 $decoded .= $1;
799 8         15 last;
800             }
801 2712 100       5830 last if $last;
802             }
803 945         3013 return ($quoted, $decoded, $eol);
804             }
805              
806             sub _fetch_next_tokens_directive {
807 67     67   152 my ($self, $yaml, $eol) = @_;
808 67         93 my @tokens;
809              
810 67         113 my $trailing_ws = '';
811 67   100     217 my $warn = $ENV{YAML_PP_RESERVED_DIRECTIVE} || 'warn';
812 67 100       1553 if ($$yaml =~ s/\A(\s*%YAML[ \t]+([0-9]+\.[0-9]+))//) {
    100          
    50          
813 47         113 my $dir = $1;
814 47         94 my $version = $2;
815 47 100       481 if ($$yaml =~ s/\A($RE_WS+)//) {
    50          
816 9         34 $trailing_ws = $1;
817             }
818             elsif (length $$yaml) {
819 0         0 push @tokens, ( 'Invalid directive' => $dir.$$yaml.$eol, $self->line );
820 0         0 $self->_push_tokens(\@tokens);
821 0         0 return;
822             }
823 47 50       206 if ($version !~ m/^1\.[12]$/) {
824 0 0       0 if ($warn eq 'warn') {
    0          
825 0         0 warn "Unsupported YAML version '$dir'";
826             }
827             elsif ($warn eq 'fatal') {
828 0         0 push @tokens, ( 'Unsupported YAML version' => $dir, $self->line );
829 0         0 $self->_push_tokens(\@tokens);
830 0         0 return;
831             }
832             }
833 47         141 push @tokens, ( YAML_DIRECTIVE => $dir, $self->line );
834             }
835             elsif ($$yaml =~ s/\A(\s*%TAG[ \t]+(!$RE_NS_WORD_CHAR*!|!)[ \t]+(tag:\S+|!$RE_URI_CHAR+))($RE_WS*)//) {
836 11         54 push @tokens, ( TAG_DIRECTIVE => $1, $self->line );
837             # TODO
838 11         29 my $tag_alias = $2;
839 11         42 my $tag_url = $3;
840 11         42 $trailing_ws = $4;
841             }
842             elsif ($$yaml =~ s/\A(\s*\A%(?:\w+).*)//) {
843 9         56 push @tokens, ( RESERVED_DIRECTIVE => $1, $self->line );
844 9 50       70 if ($warn eq 'warn') {
    50          
845 0         0 warn "Found reserved directive '$1'";
846             }
847             elsif ($warn eq 'fatal') {
848 0         0 die "Found reserved directive '$1'";
849             }
850             }
851             else {
852 0         0 push @tokens, ( 'Invalid directive' => $$yaml, $self->line );
853 0         0 push @tokens, ( EOL => $eol, $self->line );
854 0         0 $self->_push_tokens(\@tokens);
855 0         0 return;
856             }
857 67 100 33     308 if (not length $$yaml) {
    50          
    0          
858 58         150 push @tokens, ( EOL => $eol, $self->line );
859             }
860             elsif ($trailing_ws and $$yaml =~ s/\A(#.*)?\z//) {
861 9         59 push @tokens, ( EOL => "$trailing_ws$1$eol", $self->line );
862 9         46 $self->_push_tokens(\@tokens);
863 9         22 return;
864             }
865             elsif ($$yaml =~ s/\A([ \t]+#.*)?\z//) {
866 0         0 push @tokens, ( EOL => "$1$eol", $self->line );
867 0         0 $self->_push_tokens(\@tokens);
868 0         0 return;
869             }
870             else {
871 0         0 push @tokens, ( 'Invalid directive' => $trailing_ws.$$yaml, $self->line );
872 0         0 push @tokens, ( EOL => $eol, $self->line );
873             }
874 58         194 $self->_push_tokens(\@tokens);
875 58         152 return;
876             }
877              
878             sub _push_tokens {
879 17773     17773   30992 my ($self, $new_tokens) = @_;
880 17773         28964 my $next = $self->next_tokens;
881 17773         29837 my $line = $self->line;
882 17773         28425 my $column = $self->offset;
883              
884 17773         40509 for (my $i = 0; $i < @$new_tokens; $i += 3) {
885 36926         57381 my $value = $new_tokens->[ $i + 1 ];
886 36926         49604 my $name = $new_tokens->[ $i ];
887 36926         48252 my $line = $new_tokens->[ $i + 2 ];
888 36926         111434 my $push = {
889             name => $name,
890             line => $line,
891             column => $column,
892             value => $value,
893             };
894 36926 100       79421 $column += length $value unless $name eq 'CONTEXT';
895 36926         54072 push @$next, $push;
896 36926 100       91314 if ($name eq 'EOL') {
897 5553         13149 $column = 0;
898             }
899             }
900 17773         39996 $self->set_offset($column);
901 17773         27008 return $next;
902             }
903              
904             sub push_subtokens {
905 2591     2591 0 4943 my ($self, $token, $subtokens) = @_;
906 2591         4790 my $next = $self->next_tokens;
907 2591         4343 my $line = $self->line;
908 2591         4379 my $column = $self->offset;
909 2591         4166 $token->{column} = $column;
910 2591         4530 $token->{subtokens} = \my @sub;
911              
912 2591         6490 for (my $i = 0; $i < @$subtokens; $i+=3) {
913 8686         12246 my $name = $subtokens->[ $i ];
914 8686         14229 my $value = $subtokens->[ $i + 1 ];
915 8686         11110 my $line = $subtokens->[ $i + 2 ];
916 8686         21672 my $push = {
917             name => $subtokens->[ $i ],
918             line => $line,
919             column => $column,
920             };
921 8686 100       17199 if (ref $value eq 'HASH') {
922 2517         15719 %$push = ( %$push, %$value );
923 2517         6462 $column += length $value->{orig};
924             }
925             else {
926 6169         10623 $push->{value} = $value;
927 6169         8205 $column += length $value;
928             }
929 8686 100       15382 if ($push->{name} eq 'EOL') {
930 446         630 $column = 0;
931             }
932 8686         20096 push @sub, $push;
933             }
934 2591         4384 $token->{line} = $sub[0]->{line};
935 2591         4029 push @$next, $token;
936 2591         5837 $self->set_offset($column);
937 2591         4881 return $next;
938             }
939              
940             sub exception {
941 36     36 0 75 my ($self, $msg) = @_;
942 36         68 my $next = $self->next_tokens;
943 36         58 $next = [];
944 36 50       141 my $line = @$next ? $next->[0]->{line} : $self->line;
945 36         258 my @caller = caller(0);
946 36         99 my $yaml = '';
947 36 50       77 if (my $nl = $self->next_line) {
948 36         102 $yaml = join '', @$nl;
949 36         70 $yaml = $nl->[1];
950             }
951 36         85 my $e = YAML::PP::Exception->new(
952             line => $line,
953             column => $self->offset + 1,
954             msg => $msg,
955             next => $next,
956             where => $caller[1] . ' line ' . $caller[2],
957             yaml => $yaml,
958             );
959 36         847 croak $e;
960             }
961              
962             1;