File Coverage

blib/lib/YAML/PP/Lexer.pm
Criterion Covered Total %
statement 532 554 96.0
branch 244 260 93.8
condition 42 44 95.4
subroutine 35 36 97.2
pod 0 24 0.0
total 853 918 92.9


line stmt bran cond sub pod time code
1 42     42   305 use strict;
  42         82  
  42         1261  
2 42     42   213 use warnings;
  42         79  
  42         2738  
3             package YAML::PP::Lexer;
4              
5             our $VERSION = '0.036_002'; # TRIAL VERSION
6              
7 42 50   42   279 use constant TRACE => $ENV{YAML_PP_TRACE} ? 1 : 0;
  42         78  
  42         3616  
8 42 100 66 42   364 use constant DEBUG => ($ENV{YAML_PP_DEBUG} || $ENV{YAML_PP_TRACE}) ? 1 : 0;
  42         94  
  42         2968  
9              
10 42     42   24842 use YAML::PP::Grammar qw/ $GRAMMAR /;
  42         264  
  42         5754  
11 42     42   334 use Carp qw/ croak /;
  42         87  
  42         285781  
12              
13             sub new {
14 4667     4667 0 12091 my ($class, %args) = @_;
15             my $self = bless {
16             reader => $args{reader},
17 4667         12039 }, $class;
18 4667         12560 $self->init;
19 4667         16358 return $self;
20             }
21              
22             sub init {
23 19809     19809 0 31441 my ($self) = @_;
24 19809         35621 $self->{next_tokens} = [];
25 19809         30077 $self->{next_line} = undef;
26 19809         29044 $self->{line} = 0;
27 19809         27068 $self->{offset} = 0;
28 19809         33945 $self->{flowcontext} = 0;
29             }
30              
31 74536     74536 0 123694 sub next_line { return $_[0]->{next_line} }
32 70098     70098 0 113253 sub set_next_line { $_[0]->{next_line} = $_[1] }
33 41006     41006 0 109374 sub reader { return $_[0]->{reader} }
34 7571     7571 0 23392 sub set_reader { $_[0]->{reader} = $_[1] }
35 130659     130659 0 214568 sub next_tokens { return $_[0]->{next_tokens} }
36 272977     272977 0 562088 sub line { return $_[0]->{line} }
37 0     0 0 0 sub set_line { $_[0]->{line} = $_[1] }
38 91162     91162 0 138256 sub offset { return $_[0]->{offset} }
39 91060     91060 0 146023 sub set_offset { $_[0]->{offset} = $_[1] }
40 29191     29191 0 48121 sub inc_line { return $_[0]->{line}++ }
41 67864     67864 0 140089 sub context { return $_[0]->{context} }
42 3870     3870 0 7449 sub set_context { $_[0]->{context} = $_[1] }
43 107289     107289 0 273869 sub flowcontext { return $_[0]->{flowcontext} }
44 4931     4931 0 8772 sub set_flowcontext { $_[0]->{flowcontext} = $_[1] }
45 17857     17857 0 54051 sub block { return $_[0]->{block} }
46 29271     29271 0 49937 sub set_block { $_[0]->{block} = $_[1] }
47              
48             my $RE_WS = '[\t ]';
49             my $RE_LB = '[\r\n]';
50             my $RE_DOC_END = qr/\A(\.\.\.)(?=$RE_WS|$)/m;
51             my $RE_DOC_START = qr/\A(---)(?=$RE_WS|$)/m;
52             my $RE_EOL = qr/\A($RE_WS+#.*|$RE_WS+)\z/;
53             #my $RE_COMMENT_EOL = qr/\A(#.*)?(?:$RE_LB|\z)/;
54              
55             #ns-word-char ::= ns-dec-digit | ns-ascii-letter | “-”
56             my $RE_NS_WORD_CHAR = '[0-9A-Za-z-]';
57             my $RE_URI_CHAR = '(?:' . '%[0-9a-fA-F]{2}' .'|'. q{[0-9A-Za-z#;/?:@&=+$,_.!*'\(\)\[\]-]} . ')';
58             my $RE_NS_TAG_CHAR = '(?:' . '%[0-9a-fA-F]{2}' .'|'. q{[0-9A-Za-z#;/?:@&=+$_.~*'\(\)-]} . ')';
59              
60             # [#x21-#x7E] /* 8 bit */
61             # | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] /* 16 bit */
62             # | [#x10000-#x10FFFF] /* 32 bit */
63              
64             #nb-char ::= c-printable - b-char - c-byte-order-mark
65             #my $RE_NB_CHAR = '[\x21-\x7E]';
66             my $RE_ANCHOR_CAR = '[\x21-\x2B\x2D-\x5A\x5C\x5E-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
67              
68             my $RE_PLAIN_START = '[\x21\x22\x24-\x39\x3B-\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
69             my $RE_PLAIN_END = '[\x21-\x39\x3B-\x7E\x85\xA0-\x{D7FF}\x{E000}-\x{FEFE}\x{FF00}-\x{FFFD}\x{10000}-\x{10FFFF}]';
70             my $RE_PLAIN_FIRST = '[\x24\x28-\x29\x2B\x2E-\x39\x3B-\x3D\x41-\x5A\x5C\x5E-\x5F\x61-\x7A\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
71              
72             my $RE_PLAIN_START_FLOW = '[\x21\x22\x24-\x2B\x2D-\x39\x3B-\x5A\x5C\x5E-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
73             my $RE_PLAIN_END_FLOW = '[\x21-\x2B\x2D-\x39\x3B-\x5A\x5C\x5E-\x7A\x7C\x7E\x85\xA0-\x{D7FF}\x{E000}-\x{FEFE}\x{FF00}-\x{FFFD}\x{10000}-\x{10FFFF}]';
74             my $RE_PLAIN_FIRST_FLOW = '[\x24\x28-\x29\x2B\x2E-\x39\x3B-\x3D\x41-\x5A\x5C\x5E-\x5F\x61-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
75             # c-indicators
76             #! 21
77             #" 22
78             ## 23
79             #% 25
80             #& 26
81             #' 27
82             #* 2A
83             #, 2C FLOW
84             #- 2D XX
85             #: 3A XX
86             #> 3E
87             #? 3F XX
88             #@ 40
89             #[ 5B FLOW
90             #] 5D FLOW
91             #` 60
92             #{ 7B FLOW
93             #| 7C
94             #} 7D FLOW
95              
96              
97             my $RE_PLAIN_WORD = "(?::+$RE_PLAIN_END|$RE_PLAIN_START)(?::+$RE_PLAIN_END|$RE_PLAIN_END)*";
98             my $RE_PLAIN_FIRST_WORD = "(?:[:?-]+$RE_PLAIN_END|$RE_PLAIN_FIRST)(?::+$RE_PLAIN_END|$RE_PLAIN_END)*";
99             my $RE_PLAIN_WORDS = "(?:$RE_PLAIN_FIRST_WORD(?:$RE_WS+$RE_PLAIN_WORD)*)";
100             my $RE_PLAIN_WORDS2 = "(?:$RE_PLAIN_WORD(?:$RE_WS+$RE_PLAIN_WORD)*)";
101              
102             my $RE_PLAIN_WORD_FLOW = "(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_START_FLOW)(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_END_FLOW)*";
103             my $RE_PLAIN_FIRST_WORD_FLOW = "(?:[:?-]+$RE_PLAIN_END_FLOW|$RE_PLAIN_FIRST_FLOW)(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_END_FLOW)*";
104             my $RE_PLAIN_WORDS_FLOW = "(?:$RE_PLAIN_FIRST_WORD_FLOW(?:$RE_WS+$RE_PLAIN_WORD_FLOW)*)";
105             my $RE_PLAIN_WORDS_FLOW2 = "(?:$RE_PLAIN_WORD_FLOW(?:$RE_WS+$RE_PLAIN_WORD_FLOW)*)";
106              
107              
108             #c-secondary-tag-handle ::= “!” “!”
109             #c-named-tag-handle ::= “!” ns-word-char+ “!”
110             #ns-tag-char ::= ns-uri-char - “!” - c-flow-indicator
111             #ns-global-tag-prefix ::= ns-tag-char ns-uri-char*
112             #c-ns-local-tag-prefix ::= “!” ns-uri-char*
113             my $RE_TAG = "!(?:$RE_NS_WORD_CHAR*!$RE_NS_TAG_CHAR+|$RE_NS_TAG_CHAR+|<$RE_URI_CHAR+>|)";
114              
115             #c-ns-anchor-property ::= “&” ns-anchor-name
116             #ns-char ::= nb-char - s-white
117             #ns-anchor-char ::= ns-char - c-flow-indicator
118             #ns-anchor-name ::= ns-anchor-char+
119              
120             my $RE_SEQSTART = qr/\A(-)(?=$RE_WS|$)/m;
121             my $RE_COMPLEX = qr/(\?)(?=$RE_WS|$)/m;
122             my $RE_COMPLEXCOLON = qr/\A(:)(?=$RE_WS|$)/m;
123             my $RE_ANCHOR = "&$RE_ANCHOR_CAR+";
124             my $RE_ALIAS = "\\*$RE_ANCHOR_CAR+";
125              
126              
127             my %REGEXES = (
128             ANCHOR => qr{($RE_ANCHOR)},
129             TAG => qr{($RE_TAG)},
130             ALIAS => qr{($RE_ALIAS)},
131             SINGLEQUOTED => qr{(?:''|[^'\r\n]+)*},
132             );
133              
134             sub _fetch_next_line {
135 47452     47452   76255 my ($self) = @_;
136 47452         84568 my $next_line = $self->next_line;
137 47452 100       93772 if (defined $next_line ) {
138 6446         12437 return $next_line;
139             }
140              
141 41006         70751 my $line = $self->reader->readline;
142 41006 100       90472 unless (defined $line) {
143 11815         27296 $self->set_next_line(undef);
144 11815         22679 return;
145             }
146 29191         70448 $self->set_block(1);
147 29191         65119 $self->inc_line;
148 29191 50       107618 $line =~ m/\A( *)([^\r\n]*)([\r\n]|\z)/ or die "Unexpected";
149 29191         95702 $next_line = [ $1, $2, $3 ];
150 29191         69514 $self->set_next_line($next_line);
151             # $ESCAPE_CHAR from YAML.pm
152 29191 100       72928 if ($line =~ tr/\x00-\x08\x0b-\x0c\x0e-\x1f//) {
153 29         70 $self->exception("Control characters are not allowed");
154             }
155              
156 29162         60648 return $next_line;
157             }
158              
159             my %TOKEN_NAMES = (
160             '"' => 'DOUBLEQUOTE',
161             "'" => 'SINGLEQUOTE',
162             '|' => 'LITERAL',
163             '>' => 'FOLDED',
164             '!' => 'TAG',
165             '*' => 'ALIAS',
166             '&' => 'ANCHOR',
167             ':' => 'COLON',
168             '-' => 'DASH',
169             '?' => 'QUESTION',
170             '[' => 'FLOWSEQ_START',
171             ']' => 'FLOWSEQ_END',
172             '{' => 'FLOWMAP_START',
173             '}' => 'FLOWMAP_END',
174             ',' => 'FLOW_COMMA',
175             '---' => 'DOC_START',
176             '...' => 'DOC_END',
177             );
178              
179              
180             sub fetch_next_tokens {
181 31903     31903 0 53139 my ($self) = @_;
182 31903         56441 my $next = $self->next_tokens;
183 31903 100       67747 return $next if @$next;
184              
185 31103         58708 my $next_line = $self->_fetch_next_line;
186 31074 100       61924 if (not $next_line) {
187 7425         17766 return [];
188             }
189              
190 23649         38977 my $spaces = $next_line->[0];
191 23649         37415 my $yaml = \$next_line->[1];
192 23649 100       58436 if (not length $$yaml) {
193 157         693 $self->_push_tokens([ EOL => join('', @$next_line), $self->line ]);
194 157         507 $self->set_next_line(undef);
195 157         496 return $next;
196             }
197 23492 100       59252 if (substr($$yaml, 0, 1) eq '#') {
198 546         2384 $self->_push_tokens([ EOL => join('', @$next_line), $self->line ]);
199 546         1777 $self->set_next_line(undef);
200 546         1746 return $next;
201             }
202 22946 100 100     81450 if (not $spaces and substr($$yaml, 0, 1) eq "%") {
203 326         1759 $self->_fetch_next_tokens_directive($yaml, $next_line->[2]);
204 326         1040 $self->set_context(0);
205 326         816 $self->set_next_line(undef);
206 326         1165 return $next;
207             }
208 22620 100 100     152052 if (not $spaces and $$yaml =~ s/\A(---|\.\.\.)(?=$RE_WS|\z)//) {
    100 100        
209 4270         16645 $self->_push_tokens([ $TOKEN_NAMES{ $1 } => $1, $self->line ]);
210             }
211             elsif ($self->flowcontext and $$yaml =~ m/\A[ \t]+(#.*)?\z/) {
212 59         271 $self->_push_tokens([ EOL => join('', @$next_line), $self->line ]);
213 59         197 $self->set_next_line(undef);
214 59         207 return $next;
215             }
216             else {
217 18291         37178 $self->_push_tokens([ SPACE => $spaces, $self->line ]);
218             }
219              
220 22561         57106 my $partial = $self->_fetch_next_tokens($next_line);
221 22555 100       46771 unless ($partial) {
222 4687         9383 $self->set_next_line(undef);
223             }
224 22555         61898 return $next;
225             }
226              
227             my %ANCHOR_ALIAS_TAG = ( '&' => 1, '*' => 1, '!' => 1 );
228             my %BLOCK_SCALAR = ( '|' => 1, '>' => 1 );
229             my %COLON_DASH_QUESTION = ( ':' => 1, '-' => 1, '?' => 1 );
230             my %QUOTED = ( '"' => 1, "'" => 1 );
231             my %FLOW = ( '{' => 1, '[' => 1, '}' => 1, ']' => 1, ',' => 1 );
232             my %CONTEXT = ( '"' => 1, "'" => 1, '>' => 1, '|' => 1 );
233              
234             my $RE_ESCAPES = qr{(?:
235             \\([ \\\/_0abefnrtvLNP\t"]) | \\x([0-9a-fA-F]{2})
236             | \\u([A-Fa-f0-9]{4}) | \\U([A-Fa-f0-9]{4,8})
237             )}x;
238             my %CONTROL = (
239             '\\' => '\\', '/' => '/', n => "\n", t => "\t", r => "\r", b => "\b",
240             'a' => "\a", 'b' => "\b", 'e' => "\e", 'f' => "\f", 'v' => "\x0b", "\t" => "\t",
241             'P' => "\x{2029}", L => "\x{2028}", 'N' => "\x85",
242             '0' => "\0", '_' => "\xa0", ' ' => ' ', q/"/ => q/"/,
243             );
244              
245             sub _fetch_next_tokens {
246 36370     36370   48523 TRACE and warn __PACKAGE__.':'.__LINE__.": _fetch_next_tokens\n";
247 36370         59208 my ($self, $next_line) = @_;
248              
249 36370         57571 my $yaml = \$next_line->[1];
250 36370         57117 my $eol = $next_line->[2];
251              
252 36370         49197 my @tokens;
253              
254 36370         46270 while (1) {
255 74492 100       158908 unless (length $$yaml) {
256 6628         12403 push @tokens, ( EOL => $eol, $self->line );
257 6628         16949 $self->_push_tokens(\@tokens);
258 6628         17659 return;
259             }
260 67864         116862 my $first = substr($$yaml, 0, 1);
261 67864         91982 my $plain = 0;
262              
263 67864 100       116419 if ($self->context) {
264 1781 100       9797 if ($$yaml =~ s/\A($RE_WS*)://) {
265 142 100       526 push @tokens, ( WS => $1, $self->line ) if $1;
266 142         376 push @tokens, ( COLON => ':', $self->line );
267 142         355 $self->set_context(0);
268 142         310 next;
269             }
270 1639 100       6427 if ($$yaml =~ s/\A($RE_WS*(?: #.*))\z//) {
271 9         61 push @tokens, ( EOL => $1 . $eol, $self->line );
272 9         40 $self->_push_tokens(\@tokens);
273 9         49 return;
274             }
275 1630         2948 $self->set_context(0);
276             }
277 67713 100 100     261690 if ($CONTEXT{ $first }) {
    100          
    100          
    100          
    100          
278 6347         11922 push @tokens, ( CONTEXT => $first, $self->line );
279 6347         16025 $self->_push_tokens(\@tokens);
280 6347         18595 return 1;
281             }
282             elsif ($COLON_DASH_QUESTION{ $first }) {
283 19543         31057 my $token_name = $TOKEN_NAMES{ $first };
284 19543 100 100     248223 if ($$yaml =~ s/\A\Q$first\E($RE_WS+|\z)//) {
    100          
285 19018         48434 my $after = $1;
286 19018 100 100     38130 if (not $self->flowcontext and not $self->block) {
287 8         46 push @tokens, ERROR => $first . $after, $self->line;
288 8         22 $self->_push_tokens(\@tokens);
289 8         24 $self->exception("Tabs can not be used for indentation");
290             }
291 19010 100       50549 if ($after =~ tr/\t//) {
292 80         315 $self->set_block(0);
293             }
294 19010         32246 my $token_name = $TOKEN_NAMES{ $first };
295 19010         36114 push @tokens, ( $token_name => $first, $self->line );
296 19010 50       48112 if (not defined $1) {
297 0         0 push @tokens, ( EOL => $eol, $self->line );
298 0         0 $self->_push_tokens(\@tokens);
299 0         0 return;
300             }
301 19010         29623 my $ws = $1;
302 19010 100       66090 if ($$yaml =~ s/\A(#.*|)\z//) {
303 2520         7976 push @tokens, ( EOL => $ws . $1 . $eol, $self->line );
304 2520         6496 $self->_push_tokens(\@tokens);
305 2520         9160 return;
306             }
307 16490         31983 push @tokens, ( WS => $ws, $self->line );
308 16490         42491 next;
309             }
310             elsif ($self->flowcontext and $$yaml =~ s/\A:(?=[,\{\}\[\]])//) {
311 7         37 push @tokens, ( $token_name => $first, $self->line );
312 7         18 next;
313             }
314 518         1157 $plain = 1;
315             }
316             elsif ($ANCHOR_ALIAS_TAG{ $first }) {
317 5369         9385 my $token_name = $TOKEN_NAMES{ $first };
318 5369         8866 my $REGEX = $REGEXES{ $token_name };
319 5369 50       156159 if ($$yaml =~ s/\A$REGEX//) {
320 5369         15747 push @tokens, ( $token_name => $1, $self->line );
321             }
322             else {
323 0         0 push @tokens, ( "Invalid $token_name" => $$yaml, $self->line );
324 0         0 $self->_push_tokens(\@tokens);
325 0         0 return;
326             }
327             }
328             elsif ($first eq ' ' or $first eq "\t") {
329 9094 50       53444 if ($$yaml =~ s/\A($RE_WS+)//) {
330 9094         19494 my $ws = $1;
331 9094 100       33173 if ($$yaml =~ s/\A((?:#.*)?\z)//) {
332 208         851 push @tokens, ( EOL => $ws . $1 . $eol, $self->line );
333 208         711 $self->_push_tokens(\@tokens);
334 208         750 return;
335             }
336 8886         19223 push @tokens, ( WS => $ws, $self->line );
337             }
338             }
339             elsif ($FLOW{ $first }) {
340 7228         16311 push @tokens, ( $TOKEN_NAMES{ $first } => $first, $self->line );
341 7228         19598 substr($$yaml, 0, 1, '');
342 7228         13944 my $flowcontext = $self->flowcontext;
343 7228 100 100     31643 if ($first eq '{' or $first eq '[') {
    100 100        
344 2471         5026 $self->set_flowcontext(++$flowcontext);
345             }
346             elsif ($first eq '}' or $first eq ']') {
347 2460         4923 $self->set_flowcontext(--$flowcontext);
348             }
349             }
350             else {
351 20132         32192 $plain = 1;
352             }
353              
354 42133 100       83893 if ($plain) {
355 20650         37074 push @tokens, ( CONTEXT => '', $self->line );
356 20650         49466 $self->_push_tokens(\@tokens);
357 20650         54128 return 1;
358             }
359              
360             }
361              
362 0         0 return;
363             }
364              
365             sub fetch_plain {
366 20611     20611 0 39967 my ($self, $indent, $context) = @_;
367 20611         38508 my $next_line = $self->next_line;
368 20611         34972 my $yaml = \$next_line->[1];
369 20611         31860 my $eol = $next_line->[2];
370 20611         29730 my $REGEX = $RE_PLAIN_WORDS;
371 20611 100       32503 if ($self->flowcontext) {
372 2631         4436 $REGEX = $RE_PLAIN_WORDS_FLOW;
373             }
374              
375 20611         29296 my @tokens;
376 20611 100       377812 unless ($$yaml =~ s/\A($REGEX(?:[:]+(?=\:(\s|\z)))?)//) {
377 7         33 $self->_push_tokens(\@tokens);
378 7         39 $self->exception("Invalid plain scalar");
379             }
380 20604         65283 my $plain = $1;
381 20604         46202 push @tokens, ( PLAIN => $plain, $self->line );
382              
383 20604 100       120608 if ($$yaml =~ s/\A(?:($RE_WS+#.*)|($RE_WS*))\z//) {
384 10023 100       25662 if (defined $1) {
385 130         678 push @tokens, ( EOL => $1 . $eol, $self->line );
386 130         429 $self->_push_tokens(\@tokens);
387 130         420 $self->set_next_line(undef);
388 130         588 return;
389             }
390             else {
391 9893         26567 push @tokens, ( EOL => $2. $eol, $self->line );
392 9893         21620 $self->set_next_line(undef);
393             }
394             }
395             else {
396 10581         31992 $self->_push_tokens(\@tokens);
397 10581         21966 my $partial = $self->_fetch_next_tokens($next_line);
398 10579 100       22171 if (not $partial) {
399 3067         6450 $self->set_next_line(undef);
400             }
401 10579         38779 return;
402             }
403              
404 9893         16836 my $RE2 = $RE_PLAIN_WORDS2;
405 9893 100       17081 if ($self->flowcontext) {
406 124         409 $RE2 = $RE_PLAIN_WORDS_FLOW2;
407             }
408 9893         14359 my $fetch_next = 0;
409 9893         20701 my @lines = ($plain);
410 9893         12789 my @next;
411 9893         13866 LOOP: while (1) {
412 10253         19445 $next_line = $self->_fetch_next_line;
413 10253 100       22529 if (not $next_line) {
414 3577         7354 last LOOP;
415             }
416 6676         10631 my $spaces = $next_line->[0];
417 6676         10078 my $yaml = \$next_line->[1];
418 6676         11032 my $eol = $next_line->[2];
419              
420 6676 100       14982 if (not length $$yaml) {
421 104         424 push @tokens, ( EOL => $spaces . $eol, $self->line );
422 104         309 $self->set_next_line(undef);
423 104         256 push @lines, '';
424 104         261 next LOOP;
425             }
426              
427 6572 100 100     36034 if (not $spaces and $$yaml =~ s/\A(---|\.\.\.)(?=$RE_WS|\z)//) {
428 624         2583 push @next, $TOKEN_NAMES{ $1 } => $1, $self->line;
429 624         1106 $fetch_next = 1;
430 624         1387 last LOOP;
431             }
432 5948 100       15754 if ((length $spaces) < $indent) {
433 5548         12068 last LOOP;
434             }
435              
436 400         913 my $ws = '';
437 400 100       2286 if ($$yaml =~ s/\A($RE_WS+)//) {
438 25         81 $ws = $1;
439             }
440 400 100       1078 if (not length $$yaml) {
441 11         47 push @tokens, ( EOL => $spaces . $ws . $eol, $self->line );
442 11         41 $self->set_next_line(undef);
443 11         27 push @lines, '';
444 11         33 next LOOP;
445             }
446 389 100       1329 if ($$yaml =~ s/\A(#.*)\z//) {
447 9         59 push @tokens, ( EOL => $spaces . $ws . $1 . $eol, $self->line );
448 9         47 $self->set_next_line(undef);
449 9         42 last LOOP;
450             }
451              
452 380 100       16494 if ($$yaml =~ s/\A($RE2)//) {
453 294         1011 push @tokens, INDENT => $spaces, $self->line;
454 294         748 push @tokens, WS => $ws, $self->line;
455 294         675 push @tokens, PLAIN => $1, $self->line;
456 294         654 push @lines, $1;
457 294         538 my $ws = '';
458 294 100       1498 if ($$yaml =~ s/\A($RE_WS+)//) {
459 41         126 $ws = $1;
460             }
461 294 100       927 if (not length $$yaml) {
462 245         732 push @tokens, EOL => $ws . $eol, $self->line;
463 245         656 $self->set_next_line(undef);
464 245         849 next LOOP;
465             }
466              
467 49 100       318 if ($$yaml =~ s/\A(#.*)\z//) {
468 17         92 push @tokens, EOL => $ws . $1 . $eol, $self->line;
469 17         73 $self->set_next_line(undef);
470 17         57 last LOOP;
471             }
472             else {
473 32 100       171 push @tokens, WS => $ws, $self->line if $ws;
474 32         96 $fetch_next = 1;
475             }
476             }
477             else {
478 86         398 push @tokens, SPACE => $spaces, $self->line;
479 86         278 push @tokens, WS => $ws, $self->line;
480 86 50       222 if ($self->flowcontext) {
481 86         214 $fetch_next = 1;
482             }
483             else {
484 0         0 push @tokens, ERROR => $$yaml, $self->line;
485             }
486             }
487              
488 118         444 last LOOP;
489              
490             }
491             # remove empty lines at the end
492 9893   100     28492 while (@lines > 1 and $lines[-1] eq '') {
493 90         321 pop @lines;
494             }
495 9893 100       19885 if (@lines > 1) {
496 239         1249 my $value = YAML::PP::Render->render_multi_val(\@lines);
497 239         437 my @eol;
498 239 100       677 if ($tokens[-3] eq 'EOL') {
499 207         529 @eol = splice @tokens, -3;
500             }
501 239         1191 $self->push_subtokens( { name => 'PLAIN_MULTI', value => $value }, \@tokens);
502 239         941 $self->_push_tokens([ @eol, @next ]);
503             }
504             else {
505 9654         32787 $self->_push_tokens([ @tokens, @next ]);
506             }
507 9893         25525 @tokens = ();
508 9893 100       19657 if ($fetch_next) {
509 742         1952 my $partial = $self->_fetch_next_tokens($next_line);
510 742 100       2127 if (not $partial) {
511 601         1585 $self->set_next_line(undef);
512             }
513             }
514 9893         37455 return;
515             }
516              
517             sub fetch_block {
518 1718     1718 0 3959 my ($self, $indent, $context) = @_;
519 1718         4072 my $next_line = $self->next_line;
520 1718         3382 my $yaml = \$next_line->[1];
521 1718         2877 my $eol = $next_line->[2];
522              
523 1718         2576 my @tokens;
524 1718         3092 my $token_name = $TOKEN_NAMES{ $context };
525 1718 50       18874 $$yaml =~ s/\A\Q$context\E// or die "Unexpected";
526 1718         5767 push @tokens, ( $token_name => $context, $self->line );
527 1718         3084 my $current_indent = $indent;
528 1718         2651 my $started = 0;
529 1718         2432 my $set_indent = 0;
530 1718         2832 my $chomp = '';
531 1718 100       8146 if ($$yaml =~ s/\A([1-9])([+-]?)//) {
    100          
532 215         633 push @tokens, ( BLOCK_SCALAR_INDENT => $1, $self->line );
533 215         551 $set_indent = $1;
534 215 100       685 $chomp = $2 if $2;
535 215 100       670 push @tokens, ( BLOCK_SCALAR_CHOMP => $2, $self->line ) if $2;
536             }
537             elsif ($$yaml =~ s/\A([+-])([1-9])?//) {
538 409         1188 push @tokens, ( BLOCK_SCALAR_CHOMP => $1, $self->line );
539 409         827 $chomp = $1;
540 409 100       1184 push @tokens, ( BLOCK_SCALAR_INDENT => $2, $self->line ) if $2;
541 409 100       1020 $set_indent = $2 if $2;
542             }
543 1718 100       3979 if ($set_indent) {
544 224         414 $started = 1;
545 224 100       596 $indent-- if $indent > 0;
546 224         477 $current_indent = $indent + $set_indent;
547             }
548 1718 100       4761 if (not length $$yaml) {
    100          
549 1660         3622 push @tokens, ( EOL => $eol, $self->line );
550             }
551             elsif ($$yaml =~ s/\A($RE_WS*(?:$RE_WS#.*|))\z//) {
552 52         228 push @tokens, ( EOL => $1 . $eol, $self->line );
553             }
554             else {
555 6         24 $self->_push_tokens(\@tokens);
556 6         32 $self->exception("Invalid block scalar");
557             }
558              
559 1712         2727 my @lines;
560 1712         2677 while (1) {
561 5248         11707 $self->set_next_line(undef);
562 5248         8854 $next_line = $self->_fetch_next_line;
563 5248 100       10952 if (not $next_line) {
564 812         1324 last;
565             }
566 4436         7164 my $spaces = $next_line->[0];
567 4436         6396 my $content = $next_line->[1];
568 4436         6110 my $eol = $next_line->[2];
569 4436 100 100     16985 if (not $spaces and $content =~ m/\A(---|\.\.\.)(?=$RE_WS|\z)/) {
570 200         505 last;
571             }
572 4236 100       9964 if ((length $spaces) < $current_indent) {
573 1434 100       3167 if (length $content) {
574 700 100       2090 if ($content =~ m/\A\t/) {
575 2         9 $self->_push_tokens(\@tokens);
576 2         6 $self->exception("Invalid block scalar");
577             }
578 698         1562 last;
579             }
580             else {
581 734         1401 push @lines, '';
582 734         1758 push @tokens, ( EOL => $spaces . $eol, $self->line );
583 734         1574 next;
584             }
585             }
586 2802 100       5717 if ((length $spaces) > $current_indent) {
587 1568 100       3325 if ($started) {
588 370         2284 ($spaces, my $more_spaces) = unpack "a${current_indent}a*", $spaces;
589 370         941 $content = $more_spaces . $content;
590             }
591             }
592 2802 100       6018 unless (length $content) {
593 257         593 push @lines, '';
594 257         663 push @tokens, ( INDENT => $spaces, $self->line, EOL => $eol, $self->line );
595 257 100       634 unless ($started) {
596 131         212 $current_indent = length $spaces;
597             }
598 257         514 next;
599             }
600 2545 100       5006 unless ($started) {
601 1380         1998 $started = 1;
602 1380         2024 $current_indent = length $spaces;
603             }
604 2545         4896 push @lines, $content;
605 2545         5384 push @tokens, (
606             INDENT => $spaces, $self->line,
607             BLOCK_SCALAR_CONTENT => $content, $self->line,
608             EOL => $eol, $self->line,
609             );
610             }
611 1710         6756 my $value = YAML::PP::Render->render_block_scalar($context, $chomp, \@lines);
612 1710         4647 my @eol = splice @tokens, -3;
613 1710         7744 $self->push_subtokens( { name => 'BLOCK_SCALAR', value => $value }, \@tokens );
614 1710         6415 $self->_push_tokens([ @eol ]);
615 1710         9122 return 0;
616             }
617              
618             sub fetch_quoted {
619 4629     4629 0 9279 my ($self, $indent, $context) = @_;
620 4629         9113 my $next_line = $self->next_line;
621 4629         8424 my $yaml = \$next_line->[1];
622 4629         7690 my $spaces = $next_line->[0];
623              
624 4629         8214 my $token_name = $TOKEN_NAMES{ $context };
625 4629 50       43892 $$yaml =~ s/\A\Q$context// or die "Unexpected";;
626 4629         14704 my @tokens = ( $token_name => $context, $self->line );
627              
628 4629         7443 my $start = 1;
629 4629         6423 my @values;
630 4629         6640 while (1) {
631              
632 5477 100       11364 unless ($start) {
633 848 100       1674 $next_line = $self->_fetch_next_line or do {
634 1         15 for (my $i = 0; $i < @tokens; $i+= 3) {
635 3         8 my $token = $tokens[ $i + 1 ];
636 3 100       10 if (ref $token) {
637 1         4 $tokens[ $i + 1 ] = $token->{orig};
638             }
639             }
640 1         15 $self->_push_tokens(\@tokens);
641 1         9 $self->exception("Missing closing quote <$context> at EOF");
642             };
643 847         1421 $start = 0;
644 847         1647 $spaces = $next_line->[0];
645 847         1703 $yaml = \$next_line->[1];
646              
647 847 100 100     4799 if (not length $$yaml) {
    100          
    100          
648 160         731 push @tokens, ( EOL => $spaces . $next_line->[2], $self->line );
649 160         451 $self->set_next_line(undef);
650 160         487 push @values, { value => '', orig => '' };
651 160         294 next;
652             }
653             elsif (not $spaces and $$yaml =~ m/\A(---|\.\.\.)(?=$RE_WS|\z)/) {
654 3         14 for (my $i = 0; $i < @tokens; $i+= 3) {
655 9         17 my $token = $tokens[ $i + 1 ];
656 9 100       24 if (ref $token) {
657 3         15 $tokens[ $i + 1 ] = $token->{orig};
658             }
659             }
660 3         10 $self->_push_tokens(\@tokens);
661 3         17 $self->exception("Missing closing quote <$context> or invalid document marker");
662             }
663             elsif ((length $spaces) < $indent) {
664 3         12 for (my $i = 0; $i < @tokens; $i+= 3) {
665 9         16 my $token = $tokens[ $i + 1 ];
666 9 100       27 if (ref $token) {
667 3         11 $tokens[ $i + 1 ] = $token->{orig};
668             }
669             }
670 3         9 $self->_push_tokens(\@tokens);
671 3         16 $self->exception("Wrong indendation or missing closing quote <$context>");
672             }
673              
674 681 100       3691 if ($$yaml =~ s/\A($RE_WS+)//) {
675 95         278 $spaces .= $1;
676             }
677 681         1659 push @tokens, ( WS => $spaces, $self->line );
678             }
679              
680 5310         12589 my $v = $self->_read_quoted_tokens($start, $context, $yaml, \@tokens);
681 5308         8800 push @values, $v;
682 5308 100       12002 if ($tokens[-3] eq $token_name) {
683 4620 100       8195 if ($start) {
684             $self->push_subtokens(
685             { name => 'QUOTED', value => $v->{value} }, \@tokens
686 4131         15057 );
687             }
688             else {
689 489         2354 my $value = YAML::PP::Render->render_quoted($context, \@values);
690 489         2164 $self->push_subtokens(
691             { name => 'QUOTED_MULTILINE', value => $value }, \@tokens
692             );
693             }
694 4620 100       9429 $self->set_context(1) if $self->flowcontext;
695 4620 100       10540 if (length $$yaml) {
696 2486         5296 my $partial = $self->_fetch_next_tokens($next_line);
697 2486 100       5463 if (not $partial) {
698 1010         2176 $self->set_next_line(undef);
699             }
700 2486         13264 return 0;
701             }
702             else {
703 2134         5165 @tokens = ();
704 2134         4796 push @tokens, ( EOL => $next_line->[2], $self->line );
705 2134         5721 $self->_push_tokens(\@tokens);
706 2134         5632 $self->set_next_line(undef);
707 2134         11189 return;
708             }
709             }
710 688         1339 $tokens[-2] .= $next_line->[2];
711 688         1778 $self->set_next_line(undef);
712 688         1499 $start = 0;
713             }
714             }
715              
716             sub _read_quoted_tokens {
717 5310     5310   11112 my ($self, $start, $first, $yaml, $tokens) = @_;
718 5310         7928 my $quoted = '';
719 5310         7896 my $decoded = '';
720 5310         8888 my $token_name = $TOKEN_NAMES{ $first };
721 5310         8603 my $eol = '';
722 5310 100       10534 if ($first eq "'") {
723 2145         3627 my $regex = $REGEXES{SINGLEQUOTED};
724 2145 50       16281 if ($$yaml =~ s/\A($regex)//) {
725 2145         6247 $quoted .= $1;
726 2145         3357 $decoded .= $1;
727 2145         4473 $decoded =~ s/''/'/g;
728             }
729 2145 100       6124 unless (length $$yaml) {
730 88 100       729 if ($quoted =~ s/($RE_WS+)\z//) {
731 24         74 $eol = $1;
732 24         336 $decoded =~ s/($eol)\z//;
733             }
734             }
735             }
736             else {
737 3165         6730 ($quoted, $decoded, $eol) = $self->_read_doublequoted($yaml);
738             }
739 5310         16966 my $value = { value => $decoded, orig => $quoted };
740              
741 5310 100       31975 if ($$yaml =~ s/\A$first//) {
742 4620 100       9970 if ($start) {
743 4131         11018 push @$tokens, ( $token_name . 'D' => $value, $self->line );
744             }
745             else {
746 489         1736 push @$tokens, ( $token_name . 'D_LINE' => $value, $self->line );
747             }
748 4620         9334 push @$tokens, ( $token_name => $first, $self->line );
749 4620         12720 return $value;
750             }
751 690 100       2141 if (length $$yaml) {
752 2         11 push @$tokens, ( $token_name . 'D' => $value->{orig}, $self->line );
753 2         8 $self->_push_tokens($tokens);
754 2         11 $self->exception("Invalid quoted <$first> string");
755             }
756              
757 688         2282 push @$tokens, ( $token_name . 'D_LINE' => $value, $self->line );
758 688         1634 push @$tokens, ( EOL => $eol, $self->line );
759              
760 688         1628 return $value;
761             }
762              
763             sub _read_doublequoted {
764 3165     3165   6460 my ($self, $yaml) = @_;
765 3165         4614 my $quoted = '';
766 3165         4554 my $decoded = '';
767 3165         4518 my $eol = '';
768 3165         4238 while (1) {
769 9271         12191 my $last = 1;
770 9271 100       31699 if ($$yaml =~ s/\A([^"\\ \t]+)//) {
771 5526         11655 $quoted .= $1;
772 5526         8345 $decoded .= $1;
773 5526         7485 $last = 0;
774             }
775 9271 100       38404 if ($$yaml =~ s/\A($RE_ESCAPES)//) {
776 1475         3183 $quoted .= $1;
777 1475 100       5217 my $dec = defined $2 ? $CONTROL{ $2 }
    100          
    100          
778             : defined $3 ? chr hex $3
779             : defined $4 ? chr hex $4
780             : chr hex $5;
781 1475         2572 $decoded .= $dec;
782 1475         2458 $last = 0;
783             }
784 9271 100       26625 if ($$yaml =~ s/\A([ \t]+)//) {
785 2742         5219 my $spaces = $1;
786 2742 100       5976 if (length $$yaml) {
787 2601         3915 $quoted .= $spaces;
788 2601         3416 $decoded .= $spaces;
789 2601         3918 $last = 0;
790             }
791             else {
792 141         293 $eol = $spaces;
793 141         351 last;
794             }
795             }
796 9130 100       19311 if ($$yaml =~ s/\A(\\)\z//) {
797 82         184 $quoted .= $1;
798 82         151 $decoded .= $1;
799 82         184 last;
800             }
801 9048 100       19305 last if $last;
802             }
803 3165         10860 return ($quoted, $decoded, $eol);
804             }
805              
806             sub _fetch_next_tokens_directive {
807 326     326   939 my ($self, $yaml, $eol) = @_;
808 326         536 my @tokens;
809              
810 326         558 my $trailing_ws = '';
811 326   100     1275 my $warn = $ENV{YAML_PP_RESERVED_DIRECTIVE} || 'warn';
812 326 100       5628 if ($$yaml =~ s/\A(\s*%YAML[ \t]+([0-9]+\.[0-9]+))//) {
    100          
    50          
813 163         404 my $dir = $1;
814 163         334 my $version = $2;
815 163 100       1405 if ($$yaml =~ s/\A($RE_WS+)//) {
    100          
816 28         102 $trailing_ws = $1;
817             }
818             elsif (length $$yaml) {
819 1         13 push @tokens, ( 'Invalid directive' => $dir.$$yaml.$eol, $self->line );
820 1         6 $self->_push_tokens(\@tokens);
821 1         4 return;
822             }
823 162 100       733 if ($version !~ m/^1\.[12]$/) {
824 9 50       67 if ($warn eq 'warn') {
    50          
825 0         0 warn "Unsupported YAML version '$dir'";
826             }
827             elsif ($warn eq 'fatal') {
828 0         0 push @tokens, ( 'Unsupported YAML version' => $dir, $self->line );
829 0         0 $self->_push_tokens(\@tokens);
830 0         0 return;
831             }
832             }
833 162         514 push @tokens, ( YAML_DIRECTIVE => $dir, $self->line );
834             }
835             elsif ($$yaml =~ s/\A(\s*%TAG[ \t]+(!$RE_NS_WORD_CHAR*!|!)[ \t]+(tag:\S+|!$RE_URI_CHAR+))($RE_WS*)//) {
836 118         465 push @tokens, ( TAG_DIRECTIVE => $1, $self->line );
837             # TODO
838 118         315 my $tag_alias = $2;
839 118         261 my $tag_url = $3;
840 118         448 $trailing_ws = $4;
841             }
842             elsif ($$yaml =~ s/\A(\s*\A%(?:\w+).*)//) {
843 45         207 push @tokens, ( RESERVED_DIRECTIVE => $1, $self->line );
844 45 50       259 if ($warn eq 'warn') {
    50          
845 0         0 warn "Found reserved directive '$1'";
846             }
847             elsif ($warn eq 'fatal') {
848 0         0 die "Found reserved directive '$1'";
849             }
850             }
851             else {
852 0         0 push @tokens, ( 'Invalid directive' => $$yaml, $self->line );
853 0         0 push @tokens, ( EOL => $eol, $self->line );
854 0         0 $self->_push_tokens(\@tokens);
855 0         0 return;
856             }
857 325 100 66     1430 if (not length $$yaml) {
    100          
    50          
858 297         767 push @tokens, ( EOL => $eol, $self->line );
859             }
860             elsif ($trailing_ws and $$yaml =~ s/\A(#.*)?\z//) {
861 27         170 push @tokens, ( EOL => "$trailing_ws$1$eol", $self->line );
862 27         132 $self->_push_tokens(\@tokens);
863 27         75 return;
864             }
865             elsif ($$yaml =~ s/\A([ \t]+#.*)?\z//) {
866 0         0 push @tokens, ( EOL => "$1$eol", $self->line );
867 0         0 $self->_push_tokens(\@tokens);
868 0         0 return;
869             }
870             else {
871 1         5 push @tokens, ( 'Invalid directive' => $trailing_ws.$$yaml, $self->line );
872 1         5 push @tokens, ( EOL => $eol, $self->line );
873             }
874 298         1008 $self->_push_tokens(\@tokens);
875 298         850 return;
876             }
877              
878             sub _push_tokens {
879 84491     84491   146924 my ($self, $new_tokens) = @_;
880 84491         137394 my $next = $self->next_tokens;
881 84491         138726 my $line = $self->line;
882 84491         144343 my $column = $self->offset;
883              
884 84491         191406 for (my $i = 0; $i < @$new_tokens; $i += 3) {
885 152611         244510 my $value = $new_tokens->[ $i + 1 ];
886 152611         208467 my $name = $new_tokens->[ $i ];
887 152611         203219 my $line = $new_tokens->[ $i + 2 ];
888 152611         449753 my $push = {
889             name => $name,
890             line => $line,
891             column => $column,
892             value => $value,
893             };
894 152611 100       337829 $column += length $value unless $name eq 'CONTEXT';
895 152611         232264 push @$next, $push;
896 152611 100       392211 if ($name eq 'EOL') {
897 24386         59392 $column = 0;
898             }
899             }
900 84491         192789 $self->set_offset($column);
901 84491         128371 return $next;
902             }
903              
904             sub push_subtokens {
905 6569     6569 0 12439 my ($self, $token, $subtokens) = @_;
906 6569         11856 my $next = $self->next_tokens;
907 6569         11906 my $line = $self->line;
908 6569         12282 my $column = $self->offset;
909 6569         12267 $token->{column} = $column;
910 6569         12228 $token->{subtokens} = \my @sub;
911              
912 6569         16822 for (my $i = 0; $i < @$subtokens; $i+=3) {
913 28830         42297 my $name = $subtokens->[ $i ];
914 28830         45114 my $value = $subtokens->[ $i + 1 ];
915 28830         37783 my $line = $subtokens->[ $i + 2 ];
916 28830         66653 my $push = {
917             name => $subtokens->[ $i ],
918             line => $line,
919             column => $column,
920             };
921 28830 100       52374 if (ref $value eq 'HASH') {
922 5301         32095 %$push = ( %$push, %$value );
923 5301         14165 $column += length $value->{orig};
924             }
925             else {
926 23529         40179 $push->{value} = $value;
927 23529         34084 $column += length $value;
928             }
929 28830 100       52228 if ($push->{name} eq 'EOL') {
930 4705         6877 $column = 0;
931             }
932 28830         68845 push @sub, $push;
933             }
934 6569         12321 $token->{line} = $sub[0]->{line};
935 6569         10974 push @$next, $token;
936 6569         16711 $self->set_offset($column);
937 6569         12122 return $next;
938             }
939              
940             sub exception {
941 61     61 0 132 my ($self, $msg) = @_;
942 61         121 my $next = $self->next_tokens;
943 61         104 $next = [];
944 61 50       174 my $line = @$next ? $next->[0]->{line} : $self->line;
945 61         448 my @caller = caller(0);
946 61         133 my $yaml = '';
947 61 100       119 if (my $nl = $self->next_line) {
948 60         149 $yaml = join '', @$nl;
949 60         130 $yaml = $nl->[1];
950             }
951 61         137 my $e = YAML::PP::Exception->new(
952             line => $line,
953             column => $self->offset + 1,
954             msg => $msg,
955             next => $next,
956             where => $caller[1] . ' line ' . $caller[2],
957             yaml => $yaml,
958             );
959 61         1631 croak $e;
960             }
961              
962             1;