File Coverage

blib/lib/PPI/Lexer.pm
Criterion Covered Total %
statement 414 439 94.3
branch 246 284 86.6
condition 147 189 77.7
subroutine 28 28 100.0
pod 5 6 83.3
total 840 946 88.7


line stmt bran cond sub pod time code
1             package PPI::Lexer;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Lexer - The PPI Lexer
8              
9             =head1 SYNOPSIS
10              
11             use PPI;
12            
13             # Create a new Lexer
14             my $Lexer = PPI::Lexer->new;
15            
16             # Build a PPI::Document object from a Token stream
17             my $Tokenizer = PPI::Tokenizer->load('My/Module.pm');
18             my $Document = $Lexer->lex_tokenizer($Tokenizer);
19            
20             # Build a PPI::Document object for some raw source
21             my $source = "print 'Hello World!'; kill(Humans->all);";
22             $Document = $Lexer->lex_source($source);
23            
24             # Build a PPI::Document object for a particular file name
25             $Document = $Lexer->lex_file('My/Module.pm');
26              
27             =head1 DESCRIPTION
28              
29             The is the L Lexer. In the larger scheme of things, its job is to take
30             token streams, in a variety of forms, and "lex" them into nested structures.
31              
32             Pretty much everything in this module happens behind the scenes at this
33             point. In fact, at the moment you don't really need to instantiate the lexer
34             at all, the three main methods will auto-instantiate themselves a
35             C object as needed.
36              
37             All methods do a one-shot "lex this and give me a L object".
38              
39             In fact, if you are reading this, what you B want to do is to
40             just "load a document", in which case you can do this in a much more
41             direct and concise manner with one of the following.
42              
43             use PPI;
44            
45             $Document = PPI::Document->load( $filename );
46             $Document = PPI::Document->new( $string );
47              
48             See L for more details.
49              
50             For more unusual tasks, by all means forge onwards.
51              
52             =head1 METHODS
53              
54             =cut
55              
56 64     64   398 use strict;
  64         128  
  64         1579  
57 64     64   320 use Scalar::Util ();
  64         103  
  64         1023  
58 64     64   248 use Params::Util qw{_STRING _INSTANCE};
  64         105  
  64         2319  
59 64     64   306 use PPI ();
  64         107  
  64         663  
60 64     64   269 use PPI::Exception ();
  64         137  
  64         1163  
61 64     64   350 use PPI::Singletons '%_PARENT';
  64         144  
  64         274707  
62              
63             our $VERSION = '1.276';
64              
65             our $errstr = "";
66              
67             # Keyword -> Structure class maps
68             my %ROUND = (
69             # Conditions
70             'if' => 'PPI::Structure::Condition',
71             'elsif' => 'PPI::Structure::Condition',
72             'unless' => 'PPI::Structure::Condition',
73             'while' => 'PPI::Structure::Condition',
74             'until' => 'PPI::Structure::Condition',
75              
76             # For(each)
77             'for' => 'PPI::Structure::For',
78             'foreach' => 'PPI::Structure::For',
79             );
80              
81             # Opening brace to refining method
82             my %RESOLVE = (
83             '(' => '_round',
84             '[' => '_square',
85             '{' => '_curly',
86             );
87              
88             # Allows for experimental overriding of the tokenizer
89             our $X_TOKENIZER = "PPI::Tokenizer";
90 16709     16709 0 51024 sub X_TOKENIZER { $X_TOKENIZER }
91              
92              
93              
94              
95              
96             #####################################################################
97             # Constructor
98              
99             =pod
100              
101             =head2 new
102              
103             The C constructor creates a new C object. The object itself
104             is merely used to hold various buffers and state data during the lexing
105             process, and holds no significant data between -Elex_xxxxx calls.
106              
107             Returns a new C object
108              
109             =cut
110              
111             sub new {
112 16710     16710 1 28752 my $class = shift->_clear;
113 16710         60614 bless {
114             Tokenizer => undef, # Where we store the tokenizer for a run
115             buffer => [], # The input token buffer
116             delayed => [], # The "delayed insignificant tokens" buffer
117             }, $class;
118             }
119              
120              
121              
122              
123              
124             #####################################################################
125             # Main Lexing Methods
126              
127             =pod
128              
129             =head2 lex_file $filename
130              
131             The C method takes a filename as argument. It then loads the file,
132             creates a L for the content and lexes the token stream
133             produced by the tokenizer. Basically, a sort of all-in-one method for
134             getting a L object from a file name.
135              
136             Returns a L object, or C on error.
137              
138             =cut
139              
140             sub lex_file {
141 497 100   497 1 1648 my $self = ref $_[0] ? shift : shift->new;
142 497         1574 my $file = _STRING(shift);
143 497 100       1192 unless ( defined $file ) {
144 1         4 return $self->_error("Did not pass a filename to PPI::Lexer::lex_file");
145             }
146              
147             # Create the Tokenizer
148 496         842 my $Tokenizer = eval {
149 496         1194 X_TOKENIZER->new($file);
150             };
151 496 50       2374 if ( _INSTANCE($@, 'PPI::Exception') ) {
    50          
152 0         0 return $self->_error( $@->message );
153             } elsif ( $@ ) {
154 0         0 return $self->_error( $errstr );
155             }
156              
157 496         1861 $self->lex_tokenizer( $Tokenizer );
158             }
159              
160             =pod
161              
162             =head2 lex_source $string
163              
164             The C method takes a normal scalar string as argument. It
165             creates a L object for the string, and then lexes the
166             resulting token stream.
167              
168             Returns a L object, or C on error.
169              
170             =cut
171              
172             sub lex_source {
173 16213 50   16213 1 242026 my $self = ref $_[0] ? shift : shift->new;
174 16213         23563 my $source = shift;
175 16213 50 33     55927 unless ( defined $source and not ref $source ) {
176 0         0 return $self->_error("Did not pass a string to PPI::Lexer::lex_source");
177             }
178              
179             # Create the Tokenizer and hand off to the next method
180 16213         19656 my $Tokenizer = eval {
181 16213         25610 X_TOKENIZER->new(\$source);
182             };
183 16213 50       48979 if ( _INSTANCE($@, 'PPI::Exception') ) {
    50          
184 0         0 return $self->_error( $@->message );
185             } elsif ( $@ ) {
186 0         0 return $self->_error( $errstr );
187             }
188              
189 16213         29363 $self->lex_tokenizer( $Tokenizer );
190             }
191              
192             =pod
193              
194             =head2 lex_tokenizer $Tokenizer
195              
196             The C takes as argument a L object. It
197             lexes the token stream from the tokenizer into a L object.
198              
199             Returns a L object, or C on error.
200              
201             =cut
202              
203             sub lex_tokenizer {
204 16709 50   16709 1 30715 my $self = ref $_[0] ? shift : shift->new;
205 16709         69701 my $Tokenizer = _INSTANCE(shift, 'PPI::Tokenizer');
206 16709 50       33244 return $self->_error(
207             "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer"
208             ) unless $Tokenizer;
209              
210             # Create the empty document
211 16709         38452 my $Document = PPI::Document->new;
212              
213             # Lex the token stream into the document
214 16709         22226 $self->{Tokenizer} = $Tokenizer;
215 16709 100       19486 if ( !eval { $self->_lex_document($Document); 1 } ) {
  16709         35065  
  16708         27512  
216             # If an error occurs DESTROY the partially built document.
217 1         4 undef $Document;
218 1 50       6 if ( _INSTANCE($@, 'PPI::Exception') ) {
219 1         4 return $self->_error( $@->message );
220             } else {
221 0         0 return $self->_error( $errstr );
222             }
223             }
224              
225 16708         90749 return $Document;
226             }
227              
228              
229              
230              
231              
232             #####################################################################
233             # Lex Methods - Document Object
234              
235             sub _lex_document {
236 16709     16709   25811 my ($self, $Document) = @_;
237             # my $self = shift;
238             # my $Document = _INSTANCE(shift, 'PPI::Document') or return undef;
239              
240             # Start the processing loop
241 16709         18019 my $Token;
242 16709         29914 while ( ref($Token = $self->_get_token) ) {
243             # Add insignificant tokens directly beneath us
244 52485 100       115772 unless ( $Token->significant ) {
245 20323         40409 $self->_add_element( $Document, $Token );
246 20323         31015 next;
247             }
248              
249 32162 100       61169 if ( $Token->content eq ';' ) {
250             # It's a semi-colon on its own.
251             # We call this a null statement.
252 451         1462 $self->_add_element(
253             $Document,
254             PPI::Statement::Null->new($Token),
255             );
256 451         916 next;
257             }
258              
259             # Handle anything other than a structural element
260 31711 100       63243 unless ( ref $Token eq 'PPI::Token::Structure' ) {
261             # Determine the class for the Statement, and create it
262 28615         58581 my $Statement = $self->_statement($Document, $Token)->new($Token);
263              
264             # Move the lexing down into the statement
265 28615         65051 $self->_add_delayed( $Document );
266 28615         57478 $self->_add_element( $Document, $Statement );
267 28615         55328 $self->_lex_statement( $Statement );
268              
269 28615         55009 next;
270             }
271              
272             # Is this the opening of a structure?
273 3096 100       5748 if ( $Token->__LEXER__opens ) {
274             # This should actually have a Statement instead
275 985         2662 $self->_rollback( $Token );
276 985         2525 my $Statement = PPI::Statement->new;
277 985         2172 $self->_add_element( $Document, $Statement );
278 985         2140 $self->_lex_statement( $Statement );
279 985         1891 next;
280             }
281              
282             # Is this the close of a structure.
283 2111 50       4183 if ( $Token->__LEXER__closes ) {
284             # Because we are at the top of the tree, this is an error.
285             # This means either a mis-parsing, or a mistake in the code.
286             # To handle this, we create a "Naked Close" statement
287 2111         5659 $self->_add_element( $Document,
288             PPI::Statement::UnmatchedBrace->new($Token)
289             );
290 2111         3750 next;
291             }
292              
293             # Shouldn't be able to get here
294 0         0 PPI::Exception->throw('Lexer reached an illegal state');
295             }
296              
297             # Did we leave the main loop because of a Tokenizer error?
298 16708 50       27463 unless ( defined $Token ) {
299 0 0       0 my $errstr = $self->{Tokenizer} ? $self->{Tokenizer}->errstr : '';
300 0   0     0 $errstr ||= 'Unknown Tokenizer Error';
301 0         0 PPI::Exception->throw($errstr);
302             }
303              
304             # No error, it's just the end of file.
305             # Add any insignificant trailing tokens.
306 16708         32410 $self->_add_delayed( $Document );
307              
308             # If the Tokenizer has any v6 blocks to attach, do so now.
309             # Checking once at the end is faster than adding a special
310             # case check for every statement parsed.
311 16708         23713 my $perl6 = $self->{Tokenizer}->{'perl6'};
312 16708 100       27398 if ( @$perl6 ) {
313 2         8 my $includes = $Document->find( 'PPI::Statement::Include::Perl6' );
314 2         5 foreach my $include ( @$includes ) {
315 2 50       3 unless ( @$perl6 ) {
316 0         0 PPI::Exception->throw('Failed to find a perl6 section');
317             }
318 2         7 $include->{perl6} = shift @$perl6;
319             }
320             }
321              
322 16708         21491 return 1;
323             }
324              
325              
326              
327              
328              
329             #####################################################################
330             # Lex Methods - Statement Object
331              
332             # Keyword -> Statement Subclass
333             my %STATEMENT_CLASSES = (
334             # Things that affect the timing of execution
335             'BEGIN' => 'PPI::Statement::Scheduled',
336             'CHECK' => 'PPI::Statement::Scheduled',
337             'UNITCHECK' => 'PPI::Statement::Scheduled',
338             'INIT' => 'PPI::Statement::Scheduled',
339             'END' => 'PPI::Statement::Scheduled',
340              
341             # Special subroutines for which 'sub' is optional
342             'AUTOLOAD' => 'PPI::Statement::Sub',
343             'DESTROY' => 'PPI::Statement::Sub',
344              
345             # Loading and context statement
346             'package' => 'PPI::Statement::Package',
347             # 'use' => 'PPI::Statement::Include',
348             'no' => 'PPI::Statement::Include',
349             'require' => 'PPI::Statement::Include',
350              
351             # Various declarations
352             'my' => 'PPI::Statement::Variable',
353             'local' => 'PPI::Statement::Variable',
354             'our' => 'PPI::Statement::Variable',
355             'state' => 'PPI::Statement::Variable',
356             # Statements starting with 'sub' could be any one of...
357             # 'sub' => 'PPI::Statement::Sub',
358             # 'sub' => 'PPI::Statement::Scheduled',
359             # 'sub' => 'PPI::Statement',
360              
361             # Compound statement
362             'if' => 'PPI::Statement::Compound',
363             'unless' => 'PPI::Statement::Compound',
364             'for' => 'PPI::Statement::Compound',
365             'foreach' => 'PPI::Statement::Compound',
366             'while' => 'PPI::Statement::Compound',
367             'until' => 'PPI::Statement::Compound',
368              
369             # Switch statement
370             'given' => 'PPI::Statement::Given',
371             'when' => 'PPI::Statement::When',
372             'default' => 'PPI::Statement::When',
373              
374             # Various ways of breaking out of scope
375             'redo' => 'PPI::Statement::Break',
376             'next' => 'PPI::Statement::Break',
377             'last' => 'PPI::Statement::Break',
378             'return' => 'PPI::Statement::Break',
379             'goto' => 'PPI::Statement::Break',
380              
381             # Special sections of the file
382             '__DATA__' => 'PPI::Statement::Data',
383             '__END__' => 'PPI::Statement::End',
384             );
385              
386             sub _statement {
387 54475     54475   80346 my ($self, $Parent, $Token) = @_;
388             # my $self = shift;
389             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
390             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
391              
392             # Check for things like ( parent => ... )
393 54475 100 100     274434 if (
394             $Parent->isa('PPI::Structure::List')
395             or
396             $Parent->isa('PPI::Structure::Constructor')
397             ) {
398 7897 100       22736 if ( $Token->isa('PPI::Token::Word') ) {
399             # Is the next significant token a =>
400             # Read ahead to the next significant token
401 1965         2460 my $Next;
402 1965         3464 while ( $Next = $self->_get_token ) {
403 2725 100       6159 unless ( $Next->significant ) {
404 809         1138 push @{$self->{delayed}}, $Next;
  809         1435  
405             # $self->_delay_element( $Next );
406 809         1291 next;
407             }
408              
409             # Got the next token
410 1916 100 100     7142 if (
411             $Next->isa('PPI::Token::Operator')
412             and
413             $Next->content eq '=>'
414             ) {
415             # Is an ordinary expression
416 888         1912 $self->_rollback( $Next );
417 888         3259 return 'PPI::Statement::Expression';
418             } else {
419 1028         1500 last;
420             }
421             }
422              
423             # Rollback and continue
424 1077         1960 $self->_rollback( $Next );
425             }
426             }
427              
428 53587         66819 my $is_lexsub = 0;
429              
430             # Is it a token in our known classes list
431 53587         102100 my $class = $STATEMENT_CLASSES{$Token->content};
432 53587 100       92676 if ( $class ) {
433             # Is the next significant token a =>
434             # Read ahead to the next significant token
435 9645         11105 my $Next;
436 9645         15919 while ( $Next = $self->_get_token ) {
437 18926 100       37918 if ( !$Next->significant ) {
438 9328         10037 push @{$self->{delayed}}, $Next;
  9328         15459  
439 9328         14506 next;
440             }
441              
442             # Scheduled block must be followed by left curly or
443             # semicolon. Otherwise we have something else (e.g.
444             # open( CHECK, ... );
445 9598 100 66     19369 if (
      100        
446             'PPI::Statement::Scheduled' eq $class
447             and not ( $Next->isa( 'PPI::Token::Structure' )
448             and $Next->content =~ m/\A[{;]\z/ ) # }
449             ) {
450 1         2 $class = undef;
451 1         1 last;
452             }
453              
454             # Lexical subroutine
455 9597 100 100     15494 if (
      66        
456             $Token->content =~ /^(?:my|our|state)$/
457             and $Next->isa( 'PPI::Token::Word' ) and $Next->content eq 'sub'
458             ) {
459             # This should be PPI::Statement::Sub rather than PPI::Statement::Variable
460 7         9 $class = undef;
461 7         8 $is_lexsub = 1;
462 7         8 last;
463             }
464              
465             last if
466 9590 100 100     37208 !$Next->isa( 'PPI::Token::Operator' ) or $Next->content ne '=>';
467              
468             # Got the next token
469             # Is an ordinary expression
470 21         45 $self->_rollback( $Next );
471 21         70 return 'PPI::Statement';
472             }
473              
474             # Rollback and continue
475 9624         16460 $self->_rollback( $Next );
476             }
477              
478             # Handle potential barewords for subscripts
479 53566 100       132624 if ( $Parent->isa('PPI::Structure::Subscript') ) {
480             # Fast obvious case, just an expression
481 3852 100 100     7974 unless ( $class and $class->isa('PPI::Statement::Expression') ) {
482 3729         10960 return 'PPI::Statement::Expression';
483             }
484              
485             # This is something like "my" or "our" etc... more subtle.
486             # Check if the next token is a closing curly brace.
487             # This means we are something like $h{my}
488 123         148 my $Next;
489 123         190 while ( $Next = $self->_get_token ) {
490 119 50       256 unless ( $Next->significant ) {
491 0         0 push @{$self->{delayed}}, $Next;
  0         0  
492             # $self->_delay_element( $Next );
493 0         0 next;
494             }
495              
496             # Found the next significant token.
497             # Is it a closing curly brace?
498 119 50       226 if ( $Next->content eq '}' ) {
499 119         206 $self->_rollback( $Next );
500 119         409 return 'PPI::Statement::Expression';
501             } else {
502 0         0 $self->_rollback( $Next );
503 0         0 return $class;
504             }
505             }
506              
507             # End of file... this means it is something like $h{our
508             # which is probably going to be $h{our} ... I think
509 4         12 $self->_rollback( $Next );
510 4         12 return 'PPI::Statement::Expression';
511             }
512              
513             # If it's a token in our list, use that class
514 49714 100       99731 return $class if $class;
515              
516             # Handle the more in-depth sub detection
517 40251 100 100     88787 if ( $is_lexsub || $Token->content eq 'sub' ) {
518             # Read ahead to the next significant token
519 3303         4334 my $Next;
520 3303         5703 while ( $Next = $self->_get_token ) {
521 6547 100       13538 unless ( $Next->significant ) {
522 3268         3504 push @{$self->{delayed}}, $Next;
  3268         5487  
523             # $self->_delay_element( $Next );
524 3268         5491 next;
525             }
526              
527             # Got the next significant token
528 3279         5942 my $sclass = $STATEMENT_CLASSES{$Next->content};
529 3279 100 100     7183 if ( $sclass and $sclass eq 'PPI::Statement::Scheduled' ) {
530 28         269 $self->_rollback( $Next );
531 28         110 return 'PPI::Statement::Scheduled';
532             }
533 3251 100       8060 if ( $Next->isa('PPI::Token::Word') ) {
534 3122         6392 $self->_rollback( $Next );
535 3122         11777 return 'PPI::Statement::Sub';
536             }
537              
538             ### Comment out these two, as they would return PPI::Statement anyway
539             # if ( $content eq '{' ) {
540             # Anonymous sub at start of statement
541             # return 'PPI::Statement';
542             # }
543             #
544             # if ( $Next->isa('PPI::Token::Prototype') ) {
545             # Anonymous sub at start of statement
546             # return 'PPI::Statement';
547             # }
548              
549             # PPI::Statement is the safest fall-through
550 129         295 $self->_rollback( $Next );
551 129         476 return 'PPI::Statement';
552             }
553              
554             # End of file... PPI::Statement::Sub is the most likely
555 24         67 $self->_rollback( $Next );
556 24         120 return 'PPI::Statement::Sub';
557             }
558              
559 36948 100       63648 if ( $Token->content eq 'use' ) {
560             # Add a special case for "use v6" lines.
561 2188         2474 my $Next;
562 2188         3701 while ( $Next = $self->_get_token ) {
563 4371 100       8821 unless ( $Next->significant ) {
564 2185         2404 push @{$self->{delayed}}, $Next;
  2185         3586  
565             # $self->_delay_element( $Next );
566 2185         3429 next;
567             }
568              
569             # Found the next significant token.
570 2186 100 66     9387 if (
    100          
571             $Next->isa('PPI::Token::Operator')
572             and
573             $Next->content eq '=>'
574             ) {
575             # Is an ordinary expression
576 1         4 $self->_rollback( $Next );
577 1         4 return 'PPI::Statement';
578             # Is it a v6 use?
579             } elsif ( $Next->content eq 'v6' ) {
580 2         6 $self->_rollback( $Next );
581 2         17 return 'PPI::Statement::Include::Perl6';
582             } else {
583 2183         4547 $self->_rollback( $Next );
584 2183         8894 return 'PPI::Statement::Include';
585             }
586             }
587              
588             # End of file... this means it is an incomplete use
589             # line, just treat it as a normal include.
590 2         6 $self->_rollback( $Next );
591 2         23 return 'PPI::Statement::Include';
592             }
593              
594             # If our parent is a Condition, we are an Expression
595 34760 100       84649 if ( $Parent->isa('PPI::Structure::Condition') ) {
596 1220         4271 return 'PPI::Statement::Expression';
597             }
598              
599             # If our parent is a List, we are also an expression
600 33540 100       68460 if ( $Parent->isa('PPI::Structure::List') ) {
601 5190         17288 return 'PPI::Statement::Expression';
602             }
603              
604             # Switch statements use expressions, as well.
605 28350 100 100     120160 if (
606             $Parent->isa('PPI::Structure::Given')
607             or
608             $Parent->isa('PPI::Structure::When')
609             ) {
610 6         33 return 'PPI::Statement::Expression';
611             }
612              
613 28344 100       138999 if ( _INSTANCE($Token, 'PPI::Token::Label') ) {
614 348         1493 return 'PPI::Statement::Compound';
615             }
616              
617             # Beyond that, I have no idea for the moment.
618             # Just keep adding more conditions above this.
619 27996         88294 return 'PPI::Statement';
620             }
621              
622             sub _lex_statement {
623 55914     55914   76805 my ($self, $Statement) = @_;
624             # my $self = shift;
625             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
626              
627             # Handle some special statements
628 55914 100       163366 if ( $Statement->isa('PPI::Statement::End') ) {
629 8         24 return $self->_lex_end( $Statement );
630             }
631              
632             # Begin processing tokens
633 55906         64247 my $Token;
634 55906         84933 while ( ref( $Token = $self->_get_token ) ) {
635             # Delay whitespace and comment tokens
636 253245 100       481806 unless ( $Token->significant ) {
637 88603         96669 push @{$self->{delayed}}, $Token;
  88603         127835  
638             # $self->_delay_element( $Token );
639 88603         133284 next;
640             }
641              
642             # Structual closes, and __DATA__ and __END__ tags implicitly
643             # end every type of statement
644 164642 100 66     285780 if (
645             $Token->__LEXER__closes
646             or
647             $Token->isa('PPI::Token::Separator')
648             ) {
649             # Rollback and end the statement
650 17640         34608 return $self->_rollback( $Token );
651             }
652              
653             # Normal statements never implicitly end
654 147002 100       328648 unless ( $Statement->__LEXER__normal ) {
655             # Have we hit an implicit end to the statement
656 24513 100       44064 unless ( $self->_continues( $Statement, $Token ) ) {
657             # Rollback and finish the statement
658 4310         9353 return $self->_rollback( $Token );
659             }
660             }
661              
662             # Any normal character just gets added
663 142692 100       302525 unless ( $Token->isa('PPI::Token::Structure') ) {
664 97817         175365 $self->_add_element( $Statement, $Token );
665 97817         155367 next;
666             }
667              
668             # Handle normal statement terminators
669 44875 100       76878 if ( $Token->content eq ';' ) {
670 22676         45003 $self->_add_element( $Statement, $Token );
671 22676         32334 return 1;
672             }
673              
674             # Which leaves us with a new structure
675              
676             # Determine the class for the structure and create it
677 22199         43674 my $method = $RESOLVE{$Token->content};
678 22199         55664 my $Structure = $self->$method($Statement)->new($Token);
679              
680             # Move the lexing down into the Structure
681 22199         52754 $self->_add_delayed( $Statement );
682 22199         45263 $self->_add_element( $Statement, $Structure );
683 22199         42884 $self->_lex_structure( $Structure );
684             }
685              
686             # Was it an error in the tokenizer?
687 11280 50       18228 unless ( defined $Token ) {
688 0         0 PPI::Exception->throw;
689             }
690              
691             # No, it's just the end of the file...
692             # Roll back any insignificant tokens, they'll get added at the Document level
693 11280         16715 $self->_rollback;
694             }
695              
696             sub _lex_end {
697 8     8   18 my ($self, $Statement) = @_;
698             # my $self = shift;
699             # my $Statement = _INSTANCE(shift, 'PPI::Statement::End') or die "Bad param 1";
700              
701             # End of the file, EVERYTHING is ours
702 8         9 my $Token;
703 8         17 while ( $Token = $self->_get_token ) {
704             # Inlined $Statement->__add_element($Token);
705             Scalar::Util::weaken(
706 15         54 $_PARENT{Scalar::Util::refaddr $Token} = $Statement
707             );
708 15         17 push @{$Statement->{children}}, $Token;
  15         30  
709             }
710              
711             # Was it an error in the tokenizer?
712 8 50       21 unless ( defined $Token ) {
713 0         0 PPI::Exception->throw;
714             }
715              
716             # No, it's just the end of the file...
717             # Roll back any insignificant tokens, they get added at the Document level
718 8         17 $self->_rollback;
719             }
720              
721             # For many statements, it can be difficult to determine the end-point.
722             # This method takes a statement and the next significant token, and attempts
723             # to determine if the there is a statement boundary between the two, or if
724             # the statement can continue with the token.
725             sub _continues {
726 24513     24513   34530 my ($self, $Statement, $Token) = @_;
727             # my $self = shift;
728             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
729             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
730              
731             # Handle the simple block case
732             # { print 1; }
733 24513 100 100     48804 if (
734             $Statement->schildren == 1
735             and
736             $Statement->schild(0)->isa('PPI::Structure::Block')
737             ) {
738 49         192 return '';
739             }
740              
741             # Alrighty then, there are six implied-end statement types:
742             # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, ::When,
743             # and ::Package statements.
744 24464 50       48975 return 1
745             if ref $Statement !~ /\b(?:Scheduled|Sub|Compound|Given|When|Package)$/;
746              
747             # Of these six, ::Scheduled, ::Sub, ::Given, and ::When follow the same
748             # simple rule and can be handled first. The block form of ::Package
749             # follows the rule, too. (The non-block form of ::Package
750             # requires a statement terminator, and thus doesn't need to have
751             # an implied end detected.)
752 24464         51889 my @part = $Statement->schildren;
753 24464         32538 my $LastChild = $part[-1];
754             # If the last significant element of the statement is a block,
755             # then an implied-end statement is done, no questions asked.
756 24464 100       106762 return !$LastChild->isa('PPI::Structure::Block')
757             if !$Statement->isa('PPI::Statement::Compound');
758              
759             # Now we get to compound statements, which kind of suck (to lex).
760             # However, of them all, the 'if' type, which includes unless, are
761             # relatively easy to handle compared to the others.
762 5388         12486 my $type = $Statement->type;
763 5388 100       9942 if ( $type eq 'if' ) {
764             # This should be one of the following
765             # if (EXPR) BLOCK
766             # if (EXPR) BLOCK else BLOCK
767             # if (EXPR) BLOCK elsif (EXPR) BLOCK ... else BLOCK
768              
769             # We only implicitly end on a block
770 3377 100       8653 unless ( $LastChild->isa('PPI::Structure::Block') ) {
771             # if (EXPR) ...
772             # if (EXPR) BLOCK else ...
773             # if (EXPR) BLOCK elsif (EXPR) BLOCK ...
774 2336         5422 return 1;
775             }
776              
777             # If the token before the block is an 'else',
778             # it's over, no matter what.
779 1041         2221 my $NextLast = $Statement->schild(-2);
780 1041 50 66     6541 if (
      66        
      66        
781             $NextLast
782             and
783             $NextLast->isa('PPI::Token')
784             and
785             $NextLast->isa('PPI::Token::Word')
786             and
787             $NextLast->content eq 'else'
788             ) {
789 74         268 return '';
790             }
791              
792             # Otherwise, we continue for 'elsif' or 'else' only.
793 967 100 100     3692 if (
      100        
794             $Token->isa('PPI::Token::Word')
795             and (
796             $Token->content eq 'else'
797             or
798             $Token->content eq 'elsif'
799             )
800             ) {
801 299         959 return 1;
802             }
803              
804 668         1981 return '';
805             }
806              
807 2011 100       3740 if ( $type eq 'label' ) {
808             # We only have the label so far, could be any of
809             # LABEL while (EXPR) BLOCK
810             # LABEL while (EXPR) BLOCK continue BLOCK
811             # LABEL for (EXPR; EXPR; EXPR) BLOCK
812             # LABEL foreach VAR (LIST) BLOCK
813             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
814             # LABEL BLOCK continue BLOCK
815              
816             # Handle cases with a word after the label
817 327 100 100     1520 if (
818             $Token->isa('PPI::Token::Word')
819             and
820             $Token->content =~ /^(?:while|until|for|foreach)$/
821             ) {
822 38         104 return 1;
823             }
824              
825             # Handle labelled blocks
826 289 100 100     1167 if ( $Token->isa('PPI::Token::Structure') && $Token->content eq '{' ) {
827 210         724 return 1;
828             }
829              
830 79         205 return '';
831             }
832              
833             # Handle the common "after round braces" case
834 1684 100 100     6294 if ( $LastChild->isa('PPI::Structure') and $LastChild->braces eq '()' ) {
835             # LABEL while (EXPR) ...
836             # LABEL while (EXPR) ...
837             # LABEL for (EXPR; EXPR; EXPR) ...
838             # LABEL for VAR (LIST) ...
839             # LABEL foreach VAR (LIST) ...
840             # Only a block will do
841 373   33     1689 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
842             }
843              
844 1311 100       2573 if ( $type eq 'for' ) {
845             # LABEL for (EXPR; EXPR; EXPR) BLOCK
846 142 100 66     579 if (
    50          
    0          
847             $LastChild->isa('PPI::Token::Word')
848             and
849             $LastChild->content =~ /^for(?:each)?\z/
850             ) {
851             # LABEL for ...
852 129 100 66     876 if (
      100        
853             (
854             $Token->isa('PPI::Token::Structure')
855             and
856             $Token->content eq '('
857             )
858             or
859             $Token->isa('PPI::Token::QuoteLike::Words')
860             ) {
861 21         69 return 1;
862             }
863              
864 108 50       296 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
865             # LABEL for VAR QW{} ...
866             # LABEL foreach VAR QW{} ...
867             # Only a block will do
868 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
869             }
870              
871             # In this case, we can also behave like a foreach
872 108         163 $type = 'foreach';
873              
874             } elsif ( $LastChild->isa('PPI::Structure::Block') ) {
875             # LABEL for (EXPR; EXPR; EXPR) BLOCK
876             # That's it, nothing can continue
877 13         51 return '';
878              
879             } elsif ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
880             # LABEL for VAR QW{} ...
881             # LABEL foreach VAR QW{} ...
882             # Only a block will do
883 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
884             }
885             }
886              
887             # Handle the common continue case
888 1277 100 100     4026 if ( $LastChild->isa('PPI::Token::Word') and $LastChild->content eq 'continue' ) {
889             # LABEL while (EXPR) BLOCK continue ...
890             # LABEL foreach VAR (LIST) BLOCK continue ...
891             # LABEL BLOCK continue ...
892             # Only a block will do
893 6   33     31 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
894             }
895              
896             # Handle the common continuable block case
897 1271 100       3375 if ( $LastChild->isa('PPI::Structure::Block') ) {
898             # LABEL while (EXPR) BLOCK
899             # LABEL while (EXPR) BLOCK ...
900             # LABEL for (EXPR; EXPR; EXPR) BLOCK
901             # LABEL foreach VAR (LIST) BLOCK
902             # LABEL foreach VAR (LIST) BLOCK ...
903             # LABEL BLOCK ...
904             # Is this the block for a continue?
905 420 100 66     2718 if ( _INSTANCE($part[-2], 'PPI::Token::Word') and $part[-2]->content eq 'continue' ) {
906             # LABEL while (EXPR) BLOCK continue BLOCK
907             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
908             # LABEL BLOCK continue BLOCK
909             # That's it, nothing can continue this
910 6         23 return '';
911             }
912              
913             # Only a continue will do
914 414   100     2207 return $Token->isa('PPI::Token::Word') && $Token->content eq 'continue';
915             }
916              
917 851 50       1562 if ( $type eq 'block' ) {
918             # LABEL BLOCK continue BLOCK
919             # Every possible case is covered in the common cases above
920             }
921              
922 851 100       1554 if ( $type eq 'while' ) {
923             # LABEL while (EXPR) BLOCK
924             # LABEL while (EXPR) BLOCK continue BLOCK
925             # LABEL until (EXPR) BLOCK
926             # LABEL until (EXPR) BLOCK continue BLOCK
927             # The only case not covered is the while ...
928 149 50 66     715 if (
      66        
929             $LastChild->isa('PPI::Token::Word')
930             and (
931             $LastChild->content eq 'while'
932             or
933             $LastChild->content eq 'until'
934             )
935             ) {
936             # LABEL while ...
937             # LABEL until ...
938             # Only a condition structure will do
939 149   33     641 return $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
940             }
941             }
942              
943 702 50       1255 if ( $type eq 'foreach' ) {
944             # LABEL foreach VAR (LIST) BLOCK
945             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
946             # The only two cases that have not been covered already are
947             # 'foreach ...' and 'foreach VAR ...'
948              
949 702 100       1798 if ( $LastChild->isa('PPI::Token::Symbol') ) {
950             # LABEL foreach my $scalar ...
951             # Open round brace, or a quotewords
952 208 100 66     1038 return 1 if $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
953 16 50       85 return 1 if $Token->isa('PPI::Token::QuoteLike::Words');
954 0         0 return '';
955             }
956              
957 494 100 100     984 if ( $LastChild->content eq 'foreach' or $LastChild->content eq 'for' ) {
958             # There are three possibilities here
959 279 100 100     1114 if (
    100 100        
    100 66        
    100          
960             $Token->isa('PPI::Token::Word')
961             and (
962             ($STATEMENT_CLASSES{ $Token->content } || '')
963             eq
964             'PPI::Statement::Variable'
965             )
966             ) {
967             # VAR == 'my ...'
968 194         722 return 1;
969             } elsif ( $Token->content =~ /^\$/ ) {
970             # VAR == '$scalar'
971 34         103 return 1;
972             } elsif ( $Token->isa('PPI::Token::Structure') and $Token->content eq '(' ) {
973 42         150 return 1;
974             } elsif ( $Token->isa('PPI::Token::QuoteLike::Words') ) {
975 6         23 return 1;
976             } else {
977 3         9 return '';
978             }
979             }
980              
981 215 100 100     537 if (
982             ($STATEMENT_CLASSES{ $LastChild->content } || '')
983             eq
984             'PPI::Statement::Variable'
985             ) {
986             # LABEL foreach my ...
987             # Only a scalar will do
988 190         450 return $Token->content =~ /^\$/;
989             }
990              
991             # Handle the rare for my $foo qw{bar} ... case
992 25 50       76 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
993             # LABEL for VAR QW ...
994             # LABEL foreach VAR QW ...
995             # Only a block will do
996 25   33     107 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
997             }
998             }
999              
1000             # Something we don't know about... what could it be
1001 0         0 PPI::Exception->throw("Illegal state in '$type' compound statement");
1002             }
1003              
1004              
1005              
1006              
1007              
1008             #####################################################################
1009             # Lex Methods - Structure Object
1010              
1011             # Given a parent element, and a ( token to open a structure, determine
1012             # the class that the structure should be.
1013             sub _round {
1014 8062     8062   12406 my ($self, $Parent) = @_;
1015             # my $self = shift;
1016             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1017              
1018             # Get the last significant element in the parent
1019 8062         17955 my $Element = $Parent->schild(-1);
1020 8062 100       39545 if ( _INSTANCE($Element, 'PPI::Token::Word') ) {
1021             # Can it be determined because it is a keyword?
1022 6479         15319 my $rclass = $ROUND{$Element->content};
1023 6479 100       15103 return $rclass if $rclass;
1024             }
1025              
1026             # If we are part of a for or foreach statement, we are a ForLoop
1027 6751 100       38111 if ( $Parent->isa('PPI::Statement::Compound') ) {
    100          
    100          
1028 192 50       501 if ( $Parent->type =~ /^for(?:each)?$/ ) {
1029 192         840 return 'PPI::Structure::For';
1030             }
1031             } elsif ( $Parent->isa('PPI::Statement::Given') ) {
1032 3         24 return 'PPI::Structure::Given';
1033             } elsif ( $Parent->isa('PPI::Statement::When') ) {
1034 3         30 return 'PPI::Structure::When';
1035             }
1036              
1037             # Otherwise, it must be a list
1038              
1039             # If the previous element is -> then we mark it as a dereference
1040 6553 100 100     27426 if ( _INSTANCE($Element, 'PPI::Token::Operator') and $Element->content eq '->' ) {
1041 6         18 $Element->{_dereference} = 1;
1042             }
1043              
1044             'PPI::Structure::List'
1045 6553         19346 }
1046              
1047             # Given a parent element, and a [ token to open a structure, determine
1048             # the class that the structure should be.
1049             sub _square {
1050 3014     3014   5737 my ($self, $Parent) = @_;
1051             # my $self = shift;
1052             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1053              
1054             # Get the last significant element in the parent
1055 3014         6675 my $Element = $Parent->schild(-1);
1056              
1057             # Is this a subscript, like $foo[1] or $foo{expr}
1058            
1059 3014 100       7205 if ( $Element ) {
1060 2769 100 100     9239 if ( $Element->isa('PPI::Token::Operator') and $Element->content eq '->' ) {
1061             # $foo->[]
1062 399         841 $Element->{_dereference} = 1;
1063 399         1273 return 'PPI::Structure::Subscript';
1064             }
1065 2370 100       6945 if ( $Element->isa('PPI::Structure::Subscript') ) {
1066             # $foo{}[]
1067 22         62 return 'PPI::Structure::Subscript';
1068             }
1069 2348 100 100     7053 if ( $Element->isa('PPI::Token::Symbol') and $Element->content =~ /^(?:\$|\@)/ ) {
1070             # $foo[], @foo[]
1071 745         2422 return 'PPI::Structure::Subscript';
1072             }
1073 1603 100 100     5266 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%)/ ) {
1074 48         103 my $prior = $Parent->schild(-2);
1075 48 100 100     225 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1076             # Postfix dereference: ->@[...] ->%[...]
1077 2         7 return 'PPI::Structure::Subscript';
1078             }
1079             }
1080             # FIXME - More cases to catch
1081             }
1082              
1083             # Otherwise, we assume that it's an anonymous arrayref constructor
1084 1846         5235 'PPI::Structure::Constructor';
1085             }
1086              
1087             # Keyword -> Structure class maps
1088             my %CURLY_CLASSES = (
1089             # Blocks
1090             'sub' => 'PPI::Structure::Block',
1091             'grep' => 'PPI::Structure::Block',
1092             'map' => 'PPI::Structure::Block',
1093             'sort' => 'PPI::Structure::Block',
1094             'do' => 'PPI::Structure::Block',
1095             # rely on 'continue' + block being handled elsewhere
1096             # rely on 'eval' + block being handled elsewhere
1097              
1098             # Hash constructors
1099             'scalar' => 'PPI::Structure::Constructor',
1100             '=' => 'PPI::Structure::Constructor',
1101             '||=' => 'PPI::Structure::Constructor',
1102             '&&=' => 'PPI::Structure::Constructor',
1103             '//=' => 'PPI::Structure::Constructor',
1104             '||' => 'PPI::Structure::Constructor',
1105             '&&' => 'PPI::Structure::Constructor',
1106             '//' => 'PPI::Structure::Constructor',
1107             '?' => 'PPI::Structure::Constructor',
1108             ':' => 'PPI::Structure::Constructor',
1109             ',' => 'PPI::Structure::Constructor',
1110             '=>' => 'PPI::Structure::Constructor',
1111             '+' => 'PPI::Structure::Constructor', # per perlref
1112             'return' => 'PPI::Structure::Constructor', # per perlref
1113             'bless' => 'PPI::Structure::Constructor', # pragmatic --
1114             # perlfunc says first arg is a reference, and
1115             # bless {; ... } fails to compile.
1116             );
1117              
1118             my @CURLY_LOOKAHEAD_CLASSES = (
1119             {}, # not used
1120             {
1121             ';' => 'PPI::Structure::Block', # per perlref
1122             '}' => 'PPI::Structure::Constructor',
1123             },
1124             {
1125             '=>' => 'PPI::Structure::Constructor',
1126             },
1127             );
1128              
1129              
1130             # Given a parent element, and a { token to open a structure, determine
1131             # the class that the structure should be.
1132             sub _curly {
1133 11123     11123   17227 my ($self, $Parent) = @_;
1134             # my $self = shift;
1135             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1136              
1137             # Get the last significant element in the parent
1138 11123         21183 my $Element = $Parent->schild(-1);
1139 11123 100       32202 my $content = $Element ? $Element->content : '';
1140              
1141             # Is this a subscript, like $foo[1] or $foo{expr}
1142 11123 100       24471 if ( $Element ) {
1143 10483 100 66     24129 if ( $content eq '->' and $Element->isa('PPI::Token::Operator') ) {
1144             # $foo->{}
1145 2066         3657 $Element->{_dereference} = 1;
1146 2066         6411 return 'PPI::Structure::Subscript';
1147             }
1148 8417 100       23531 if ( $Element->isa('PPI::Structure::Subscript') ) {
1149             # $foo[]{}
1150 80         268 return 'PPI::Structure::Subscript';
1151             }
1152 8337 100 100     26304 if ( $content =~ /^(?:\$|\@)/ and $Element->isa('PPI::Token::Symbol') ) {
1153             # $foo{}, @foo{}
1154 544         1847 return 'PPI::Structure::Subscript';
1155             }
1156 7793 100 100     24708 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%|\*)/ ) {
1157 299         1033 my $prior = $Parent->schild(-2);
1158 299 100 100     1847 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1159             # Postfix dereference: ->@{...} ->%{...} ->*{...}
1160 3         10 return 'PPI::Structure::Subscript';
1161             }
1162             }
1163 7790 100       18570 if ( $Element->isa('PPI::Structure::Block') ) {
1164             # deference - ${$hash_ref}{foo}
1165             # or even ${burfle}{foo}
1166             # hash slice - @{$hash_ref}{'foo', 'bar'}
1167 2 50       5 if ( my $prior = $Parent->schild(-2) ) {
1168 2         4 my $prior_content = $prior->content();
1169 2 50 66     29 $prior->isa( 'PPI::Token::Cast' )
      33        
1170             and ( $prior_content eq '@' ||
1171             $prior_content eq '$' )
1172             and return 'PPI::Structure::Subscript';
1173             }
1174             }
1175              
1176             # Are we the last argument of sub?
1177             # E.g.: 'sub foo {}', 'sub foo ($) {}'
1178 7788 100       23614 return 'PPI::Structure::Block' if $Parent->isa('PPI::Statement::Sub');
1179              
1180             # Are we the second or third argument of package?
1181             # E.g.: 'package Foo {}' or 'package Foo v1.2.3 {}'
1182 5486 100       16271 return 'PPI::Structure::Block'
1183             if $Parent->isa('PPI::Statement::Package');
1184              
1185 4193 100       9475 if ( $CURLY_CLASSES{$content} ) {
1186             # Known type
1187 829         3022 return $CURLY_CLASSES{$content};
1188             }
1189             }
1190              
1191             # Are we in a compound statement
1192 4004 100       10372 if ( $Parent->isa('PPI::Statement::Compound') ) {
1193             # We will only encounter blocks in compound statements
1194 1857         5284 return 'PPI::Structure::Block';
1195             }
1196              
1197             # Are we the second or third argument of use
1198 2147 100       5825 if ( $Parent->isa('PPI::Statement::Include') ) {
1199 53 50 33     133 if ( $Parent->schildren == 2 ||
      66        
1200             $Parent->schildren == 3 &&
1201             $Parent->schild(2)->isa('PPI::Token::Number')
1202             ) {
1203             # This is something like use constant { ... };
1204 53         182 return 'PPI::Structure::Constructor';
1205             }
1206             }
1207              
1208             # Unless we are at the start of the statement, everything else should be a block
1209             ### FIXME This is possibly a bad choice, but will have to do for now.
1210 2094 100       6444 return 'PPI::Structure::Block' if $Element;
1211              
1212 640 100 66     2578 if (
1213             $Parent->isa('PPI::Statement')
1214             and
1215             _INSTANCE($Parent->parent, 'PPI::Structure::List')
1216             ) {
1217 162         382 my $function = $Parent->parent->parent->schild(-2);
1218              
1219             # Special case: Are we the param of a core function
1220             # i.e. map({ $_ => 1 } @foo)
1221 162 100 100     568 return 'PPI::Structure::Block'
1222             if $function and $function->content =~ /^(?:map|grep|sort|eval|do)$/;
1223              
1224             # If not part of a block print, list-embedded curlies are most likely constructors
1225 68 100 100     330 return 'PPI::Structure::Constructor'
1226             if not $function or $function->content !~ /^(?:print|say)$/;
1227             }
1228              
1229             # We need to scan ahead.
1230 484         677 my $Next;
1231 484         608 my $position = 0;
1232 484         589 my @delayed;
1233 484         865 while ( $Next = $self->_get_token ) {
1234 1192 100       2503 unless ( $Next->significant ) {
1235 203         368 push @delayed, $Next;
1236 203         393 next;
1237             }
1238              
1239             # If we are off the end of the lookahead array,
1240 989 100       2556 if ( ++$position >= @CURLY_LOOKAHEAD_CLASSES ) {
    100          
1241             # default to block.
1242 128         565 $self->_buffer( splice(@delayed), $Next );
1243 128         245 last;
1244             # If the content at this position is known
1245             } elsif ( my $class = $CURLY_LOOKAHEAD_CLASSES[$position]
1246             {$Next->content} ) {
1247             # return the associated class.
1248 268         637 $self->_buffer( splice(@delayed), $Next );
1249 268         949 return $class;
1250             }
1251              
1252             # Delay and continue
1253 593         1122 push @delayed, $Next;
1254             }
1255              
1256             # Hit the end of the document, or bailed out, go with block
1257 216         563 $self->_buffer( splice(@delayed) );
1258 216 50       567 if ( ref $Parent eq 'PPI::Statement' ) {
1259 216         362 bless $Parent, 'PPI::Statement::Compound';
1260             }
1261 216         686 return 'PPI::Structure::Block';
1262             }
1263              
1264              
1265             sub _lex_structure {
1266 22199     22199   30239 my ($self, $Structure) = @_;
1267             # my $self = shift;
1268             # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";
1269              
1270             # Start the processing loop
1271 22199         23860 my $Token;
1272 22199         32552 while ( ref($Token = $self->_get_token) ) {
1273             # Is this a direct type token
1274 88823 100       174648 unless ( $Token->significant ) {
1275 42122         43752 push @{$self->{delayed}}, $Token;
  42122         60578  
1276             # $self->_delay_element( $Token );
1277 42122         70194 next;
1278             }
1279              
1280             # Anything other than a Structure starts a Statement
1281 46701 100       125515 unless ( $Token->isa('PPI::Token::Structure') ) {
1282             # Because _statement may well delay and rollback itself,
1283             # we need to add the delayed tokens early
1284 25860         49799 $self->_add_delayed( $Structure );
1285              
1286             # Determine the class for the Statement and create it
1287 25860         47615 my $Statement = $self->_statement($Structure, $Token)->new($Token);
1288              
1289             # Move the lexing down into the Statement
1290 25860         56349 $self->_add_element( $Structure, $Statement );
1291 25860         53354 $self->_lex_statement( $Statement );
1292              
1293 25860         48709 next;
1294             }
1295              
1296             # Is this the opening of another structure directly inside us?
1297 20841 100       37864 if ( $Token->__LEXER__opens ) {
1298             # Rollback the Token, and recurse into the statement
1299 454         1347 $self->_rollback( $Token );
1300 454         1214 my $Statement = PPI::Statement->new;
1301 454         1176 $self->_add_element( $Structure, $Statement );
1302 454         1117 $self->_lex_statement( $Statement );
1303 454         1167 next;
1304             }
1305              
1306             # Is this the close of a structure ( which would be an error )
1307 20387 100       38423 if ( $Token->__LEXER__closes ) {
1308             # Is this OUR closing structure
1309 20337 100       37455 if ( $Token->content eq $Structure->start->__LEXER__opposite ) {
1310             # Add any delayed tokens, and the finishing token (the ugly way)
1311 19652         40195 $self->_add_delayed( $Structure );
1312 19652         30051 $Structure->{finish} = $Token;
1313             Scalar::Util::weaken(
1314 19652         71776 $_PARENT{Scalar::Util::refaddr $Token} = $Structure
1315             );
1316              
1317             # Confirm that ForLoop structures are actually so, and
1318             # aren't really a list.
1319 19652 100       53786 if ( $Structure->isa('PPI::Structure::For') ) {
1320 230 100       873 if ( 2 > scalar grep {
1321 592         1971 $_->isa('PPI::Statement')
1322             } $Structure->children ) {
1323 209         378 bless($Structure, 'PPI::Structure::List');
1324             }
1325             }
1326 19652         49435 return 1;
1327             }
1328              
1329             # Unmatched closing brace.
1330             # Either they typed the wrong thing, or haven't put
1331             # one at all. Either way it's an error we need to
1332             # somehow handle gracefully. For now, we'll treat it
1333             # as implicitly ending the structure. This causes the
1334             # least damage across the various reasons why this
1335             # might have happened.
1336 685         1297 return $self->_rollback( $Token );
1337             }
1338              
1339             # It's a semi-colon on its own, just inside the block.
1340             # This is a null statement.
1341             $self->_add_element(
1342 50         266 $Structure,
1343             PPI::Statement::Null->new($Token),
1344             );
1345             }
1346              
1347             # Is this an error
1348 1862 50       3107 unless ( defined $Token ) {
1349 0         0 PPI::Exception->throw;
1350             }
1351              
1352             # No, it's just the end of file.
1353             # Add any insignificant trailing tokens.
1354 1862         3006 $self->_add_delayed( $Structure );
1355             }
1356              
1357              
1358              
1359              
1360              
1361             #####################################################################
1362             # Support Methods
1363              
1364             # Get the next token for processing, handling buffering
1365             sub _get_token {
1366 458521 100   458521   449143 shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
  458521         1269844  
1367             }
1368              
1369             # Old long version of the above
1370             # my $self = shift;
1371             # # First from the buffer
1372             # if ( @{$self->{buffer}} ) {
1373             # return shift @{$self->{buffer}};
1374             # }
1375             #
1376             # # Then from the Tokenizer
1377             # $self->{Tokenizer}->get_token;
1378             # }
1379              
1380             # Delay the addition of insignificant elements.
1381             # This ended up being inlined.
1382             # sub _delay_element {
1383             # my $self = shift;
1384             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 1";
1385             # push @{ $_[0]->{delayed} }, $_[1];
1386             # }
1387              
1388             # Add an Element to a Node, including any delayed Elements
1389             sub _add_element {
1390 221541     221541   291292 my ($self, $Parent, $Element) = @_;
1391             # my $self = shift;
1392             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1393             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 2";
1394              
1395             # Handle a special case, where a statement is not fully resolved
1396 221541 100 100     442944 if ( ref $Parent eq 'PPI::Statement'
1397             and my $first = $Parent->schild(0) ) {
1398 65263 50 33     173903 if ( $first->isa('PPI::Token::Label')
1399             and !(my $second = $Parent->schild(1)) ) {
1400 0         0 my $new_class = $STATEMENT_CLASSES{$second->content};
1401             # It's a labelled statement
1402 0 0       0 bless $Parent, $new_class if $new_class;
1403             }
1404             }
1405              
1406             # Add first the delayed, from the front, then the passed element
1407 221541         223377 foreach my $el ( @{$self->{delayed}} ) {
  221541         336485  
1408             Scalar::Util::weaken(
1409 58099         211676 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1410             );
1411             # Inlined $Parent->__add_element($el);
1412             }
1413             Scalar::Util::weaken(
1414 221541         759631 $_PARENT{Scalar::Util::refaddr $Element} = $Parent
1415             );
1416 221541         221919 push @{$Parent->{children}}, @{$self->{delayed}}, $Element;
  221541         269027  
  221541         329012  
1417              
1418             # Clear the delayed elements
1419 221541         353297 $self->{delayed} = [];
1420             }
1421              
1422             # Specifically just add any delayed tokens, if any.
1423             sub _add_delayed {
1424 114896     114896   153175 my ($self, $Parent) = @_;
1425             # my $self = shift;
1426             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1427              
1428             # Add any delayed
1429 114896         118519 foreach my $el ( @{$self->{delayed}} ) {
  114896         182837  
1430             Scalar::Util::weaken(
1431 52872         191458 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1432             );
1433             # Inlined $Parent->__add_element($el);
1434             }
1435 114896         133470 push @{$Parent->{children}}, @{$self->{delayed}};
  114896         145658  
  114896         156338  
1436              
1437             # Clear the delayed elements
1438 114896         190335 $self->{delayed} = [];
1439             }
1440              
1441             # Rollback the delayed tokens, plus any passed. Once all the tokens
1442             # have been moved back on to the buffer, the order should be.
1443             # <--- @{$self->{delayed}}, @_, @{$self->{buffer}} <----
1444             sub _rollback {
1445 52586     52586   62638 my $self = shift;
1446              
1447             # First, put any passed objects back
1448 52586 100       84375 if ( @_ ) {
1449 41298         45935 unshift @{$self->{buffer}}, splice @_;
  41298         78648  
1450             }
1451              
1452             # Then, put back anything delayed
1453 52586 100       56605 if ( @{$self->{delayed}} ) {
  52586         92367  
1454 28653         29977 unshift @{$self->{buffer}}, splice @{$self->{delayed}};
  28653         34542  
  28653         43682  
1455             }
1456              
1457 52586         74575 1;
1458             }
1459              
1460             # Partial rollback, just return a single list to the buffer
1461             sub _buffer {
1462 612     612   739 my $self = shift;
1463              
1464             # Put any passed objects back
1465 612 100       1059 if ( @_ ) {
1466 471         617 unshift @{$self->{buffer}}, splice @_;
  471         1004  
1467             }
1468              
1469 612         803 1;
1470             }
1471              
1472              
1473              
1474              
1475              
1476             #####################################################################
1477             # Error Handling
1478              
1479             # Set the error message
1480             sub _error {
1481 2     2   5 $errstr = $_[1];
1482 2         10 undef;
1483             }
1484              
1485             # Clear the error message.
1486             # Returns the object as a convenience.
1487             sub _clear {
1488 16711     16711   22407 $errstr = '';
1489 16711         23713 $_[0];
1490             }
1491              
1492             =pod
1493              
1494             =head2 errstr
1495              
1496             For any error that occurs, you can use the C, as either
1497             a static or object method, to access the error message.
1498              
1499             If no error occurs for any particular action, C will return false.
1500              
1501             =cut
1502              
1503             sub errstr {
1504 2     2 1 11 $errstr;
1505             }
1506              
1507              
1508              
1509              
1510              
1511             #####################################################################
1512             # PDOM Extensions
1513             #
1514             # This is something of a future expansion... ignore it for now :)
1515             #
1516             # use PPI::Statement::Sub ();
1517             #
1518             # sub PPI::Statement::Sub::__LEXER__normal { '' }
1519              
1520             1;
1521              
1522             =pod
1523              
1524             =head1 TO DO
1525              
1526             - Add optional support for some of the more common source filters
1527              
1528             - Some additional checks for blessing things into various Statement
1529             and Structure subclasses.
1530              
1531             =head1 SUPPORT
1532              
1533             See the L in the main module.
1534              
1535             =head1 AUTHOR
1536              
1537             Adam Kennedy Eadamk@cpan.orgE
1538              
1539             =head1 COPYRIGHT
1540              
1541             Copyright 2001 - 2011 Adam Kennedy.
1542              
1543             This program is free software; you can redistribute
1544             it and/or modify it under the same terms as Perl itself.
1545              
1546             The full text of the license can be found in the
1547             LICENSE file included with this module.
1548              
1549             =cut