File Coverage

blib/lib/PPI/Lexer.pm
Criterion Covered Total %
statement 414 439 94.3
branch 246 284 86.6
condition 146 189 77.2
subroutine 28 28 100.0
pod 5 6 83.3
total 839 946 88.6


line stmt bran cond sub pod time code
1             package PPI::Lexer;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Lexer - The PPI Lexer
8              
9             =head1 SYNOPSIS
10              
11             use PPI;
12            
13             # Create a new Lexer
14             my $Lexer = PPI::Lexer->new;
15            
16             # Build a PPI::Document object from a Token stream
17             my $Tokenizer = PPI::Tokenizer->load('My/Module.pm');
18             my $Document = $Lexer->lex_tokenizer($Tokenizer);
19            
20             # Build a PPI::Document object for some raw source
21             my $source = "print 'Hello World!'; kill(Humans->all);";
22             $Document = $Lexer->lex_source($source);
23            
24             # Build a PPI::Document object for a particular file name
25             $Document = $Lexer->lex_file('My/Module.pm');
26              
27             =head1 DESCRIPTION
28              
29             The is the L Lexer. In the larger scheme of things, its job is to take
30             token streams, in a variety of forms, and "lex" them into nested structures.
31              
32             Pretty much everything in this module happens behind the scenes at this
33             point. In fact, at the moment you don't really need to instantiate the lexer
34             at all, the three main methods will auto-instantiate themselves a
35             C object as needed.
36              
37             All methods do a one-shot "lex this and give me a L object".
38              
39             In fact, if you are reading this, what you B want to do is to
40             just "load a document", in which case you can do this in a much more
41             direct and concise manner with one of the following.
42              
43             use PPI;
44            
45             $Document = PPI::Document->load( $filename );
46             $Document = PPI::Document->new( $string );
47              
48             See L for more details.
49              
50             For more unusual tasks, by all means forge onwards.
51              
52             =head1 METHODS
53              
54             =cut
55              
56 63     63   368 use strict;
  63         115  
  63         1497  
57 63     63   272 use Scalar::Util ();
  63         100  
  63         981  
58 63     63   252 use Params::Util qw{_STRING _INSTANCE};
  63         101  
  63         2271  
59 63     63   277 use PPI ();
  63         104  
  63         700  
60 63     63   256 use PPI::Exception ();
  63         96  
  63         1020  
61 63     63   281 use PPI::Singletons '%_PARENT';
  63         156  
  63         266059  
62              
63             our $VERSION = '1.275';
64              
65             our $errstr = "";
66              
67             # Keyword -> Structure class maps
68             my %ROUND = (
69             # Conditions
70             'if' => 'PPI::Structure::Condition',
71             'elsif' => 'PPI::Structure::Condition',
72             'unless' => 'PPI::Structure::Condition',
73             'while' => 'PPI::Structure::Condition',
74             'until' => 'PPI::Structure::Condition',
75              
76             # For(each)
77             'for' => 'PPI::Structure::For',
78             'foreach' => 'PPI::Structure::For',
79             );
80              
81             # Opening brace to refining method
82             my %RESOLVE = (
83             '(' => '_round',
84             '[' => '_square',
85             '{' => '_curly',
86             );
87              
88             # Allows for experimental overriding of the tokenizer
89             our $X_TOKENIZER = "PPI::Tokenizer";
90 16703     16703 0 49872 sub X_TOKENIZER { $X_TOKENIZER }
91              
92              
93              
94              
95              
96             #####################################################################
97             # Constructor
98              
99             =pod
100              
101             =head2 new
102              
103             The C constructor creates a new C object. The object itself
104             is merely used to hold various buffers and state data during the lexing
105             process, and holds no significant data between -Elex_xxxxx calls.
106              
107             Returns a new C object
108              
109             =cut
110              
111             sub new {
112 16704     16704 1 28873 my $class = shift->_clear;
113 16704         58671 bless {
114             Tokenizer => undef, # Where we store the tokenizer for a run
115             buffer => [], # The input token buffer
116             delayed => [], # The "delayed insignificant tokens" buffer
117             }, $class;
118             }
119              
120              
121              
122              
123              
124             #####################################################################
125             # Main Lexing Methods
126              
127             =pod
128              
129             =head2 lex_file $filename
130              
131             The C method takes a filename as argument. It then loads the file,
132             creates a L for the content and lexes the token stream
133             produced by the tokenizer. Basically, a sort of all-in-one method for
134             getting a L object from a file name.
135              
136             Returns a L object, or C on error.
137              
138             =cut
139              
140             sub lex_file {
141 496 100   496 1 1734 my $self = ref $_[0] ? shift : shift->new;
142 496         1462 my $file = _STRING(shift);
143 496 100       1219 unless ( defined $file ) {
144 1         4 return $self->_error("Did not pass a filename to PPI::Lexer::lex_file");
145             }
146              
147             # Create the Tokenizer
148 495         840 my $Tokenizer = eval {
149 495         1149 X_TOKENIZER->new($file);
150             };
151 495 50       2319 if ( _INSTANCE($@, 'PPI::Exception') ) {
    50          
152 0         0 return $self->_error( $@->message );
153             } elsif ( $@ ) {
154 0         0 return $self->_error( $errstr );
155             }
156              
157 495         1599 $self->lex_tokenizer( $Tokenizer );
158             }
159              
160             =pod
161              
162             =head2 lex_source $string
163              
164             The C method takes a normal scalar string as argument. It
165             creates a L object for the string, and then lexes the
166             resulting token stream.
167              
168             Returns a L object, or C on error.
169              
170             =cut
171              
172             sub lex_source {
173 16208 50   16208 1 241829 my $self = ref $_[0] ? shift : shift->new;
174 16208         22583 my $source = shift;
175 16208 50 33     53373 unless ( defined $source and not ref $source ) {
176 0         0 return $self->_error("Did not pass a string to PPI::Lexer::lex_source");
177             }
178              
179             # Create the Tokenizer and hand off to the next method
180 16208         21057 my $Tokenizer = eval {
181 16208         25533 X_TOKENIZER->new(\$source);
182             };
183 16208 50       49630 if ( _INSTANCE($@, 'PPI::Exception') ) {
    50          
184 0         0 return $self->_error( $@->message );
185             } elsif ( $@ ) {
186 0         0 return $self->_error( $errstr );
187             }
188              
189 16208         30361 $self->lex_tokenizer( $Tokenizer );
190             }
191              
192             =pod
193              
194             =head2 lex_tokenizer $Tokenizer
195              
196             The C takes as argument a L object. It
197             lexes the token stream from the tokenizer into a L object.
198              
199             Returns a L object, or C on error.
200              
201             =cut
202              
203             sub lex_tokenizer {
204 16703 50   16703 1 30429 my $self = ref $_[0] ? shift : shift->new;
205 16703         69931 my $Tokenizer = _INSTANCE(shift, 'PPI::Tokenizer');
206 16703 50       32905 return $self->_error(
207             "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer"
208             ) unless $Tokenizer;
209              
210             # Create the empty document
211 16703         38799 my $Document = PPI::Document->new;
212              
213             # Lex the token stream into the document
214 16703         22807 $self->{Tokenizer} = $Tokenizer;
215 16703 100       18876 if ( !eval { $self->_lex_document($Document); 1 } ) {
  16703         35404  
  16702         27776  
216             # If an error occurs DESTROY the partially built document.
217 1         4 undef $Document;
218 1 50       6 if ( _INSTANCE($@, 'PPI::Exception') ) {
219 1         3 return $self->_error( $@->message );
220             } else {
221 0         0 return $self->_error( $errstr );
222             }
223             }
224              
225 16702         88792 return $Document;
226             }
227              
228              
229              
230              
231              
232             #####################################################################
233             # Lex Methods - Document Object
234              
235             sub _lex_document {
236 16703     16703   25757 my ($self, $Document) = @_;
237             # my $self = shift;
238             # my $Document = _INSTANCE(shift, 'PPI::Document') or return undef;
239              
240             # Start the processing loop
241 16703         18121 my $Token;
242 16703         29573 while ( ref($Token = $self->_get_token) ) {
243             # Add insignificant tokens directly beneath us
244 52434 100       108802 unless ( $Token->significant ) {
245 20319         38227 $self->_add_element( $Document, $Token );
246 20319         30713 next;
247             }
248              
249 32115 100       60708 if ( $Token->content eq ';' ) {
250             # It's a semi-colon on its own.
251             # We call this a null statement.
252 451         1408 $self->_add_element(
253             $Document,
254             PPI::Statement::Null->new($Token),
255             );
256 451         931 next;
257             }
258              
259             # Handle anything other than a structural element
260 31664 100       61001 unless ( ref $Token eq 'PPI::Token::Structure' ) {
261             # Determine the class for the Statement, and create it
262 28631         57322 my $Statement = $self->_statement($Document, $Token)->new($Token);
263              
264             # Move the lexing down into the statement
265 28631         63516 $self->_add_delayed( $Document );
266 28631         57737 $self->_add_element( $Document, $Statement );
267 28631         54127 $self->_lex_statement( $Statement );
268              
269 28631         53526 next;
270             }
271              
272             # Is this the opening of a structure?
273 3033 100       6264 if ( $Token->__LEXER__opens ) {
274             # This should actually have a Statement instead
275 959         2271 $self->_rollback( $Token );
276 959         2535 my $Statement = PPI::Statement->new;
277 959         2063 $self->_add_element( $Document, $Statement );
278 959         2024 $self->_lex_statement( $Statement );
279 959         1877 next;
280             }
281              
282             # Is this the close of a structure.
283 2074 50       3856 if ( $Token->__LEXER__closes ) {
284             # Because we are at the top of the tree, this is an error.
285             # This means either a mis-parsing, or a mistake in the code.
286             # To handle this, we create a "Naked Close" statement
287 2074         5673 $self->_add_element( $Document,
288             PPI::Statement::UnmatchedBrace->new($Token)
289             );
290 2074         3890 next;
291             }
292              
293             # Shouldn't be able to get here
294 0         0 PPI::Exception->throw('Lexer reached an illegal state');
295             }
296              
297             # Did we leave the main loop because of a Tokenizer error?
298 16702 50       27513 unless ( defined $Token ) {
299 0 0       0 my $errstr = $self->{Tokenizer} ? $self->{Tokenizer}->errstr : '';
300 0   0     0 $errstr ||= 'Unknown Tokenizer Error';
301 0         0 PPI::Exception->throw($errstr);
302             }
303              
304             # No error, it's just the end of file.
305             # Add any insignificant trailing tokens.
306 16702         33190 $self->_add_delayed( $Document );
307              
308             # If the Tokenizer has any v6 blocks to attach, do so now.
309             # Checking once at the end is faster than adding a special
310             # case check for every statement parsed.
311 16702         24661 my $perl6 = $self->{Tokenizer}->{'perl6'};
312 16702 100       26863 if ( @$perl6 ) {
313 2         9 my $includes = $Document->find( 'PPI::Statement::Include::Perl6' );
314 2         5 foreach my $include ( @$includes ) {
315 2 50       5 unless ( @$perl6 ) {
316 0         0 PPI::Exception->throw('Failed to find a perl6 section');
317             }
318 2         5 $include->{perl6} = shift @$perl6;
319             }
320             }
321              
322 16702         22171 return 1;
323             }
324              
325              
326              
327              
328              
329             #####################################################################
330             # Lex Methods - Statement Object
331              
332             # Keyword -> Statement Subclass
333             my %STATEMENT_CLASSES = (
334             # Things that affect the timing of execution
335             'BEGIN' => 'PPI::Statement::Scheduled',
336             'CHECK' => 'PPI::Statement::Scheduled',
337             'UNITCHECK' => 'PPI::Statement::Scheduled',
338             'INIT' => 'PPI::Statement::Scheduled',
339             'END' => 'PPI::Statement::Scheduled',
340              
341             # Special subroutines for which 'sub' is optional
342             'AUTOLOAD' => 'PPI::Statement::Sub',
343             'DESTROY' => 'PPI::Statement::Sub',
344              
345             # Loading and context statement
346             'package' => 'PPI::Statement::Package',
347             # 'use' => 'PPI::Statement::Include',
348             'no' => 'PPI::Statement::Include',
349             'require' => 'PPI::Statement::Include',
350              
351             # Various declarations
352             'my' => 'PPI::Statement::Variable',
353             'local' => 'PPI::Statement::Variable',
354             'our' => 'PPI::Statement::Variable',
355             'state' => 'PPI::Statement::Variable',
356             # Statements starting with 'sub' could be any one of...
357             # 'sub' => 'PPI::Statement::Sub',
358             # 'sub' => 'PPI::Statement::Scheduled',
359             # 'sub' => 'PPI::Statement',
360              
361             # Compound statement
362             'if' => 'PPI::Statement::Compound',
363             'unless' => 'PPI::Statement::Compound',
364             'for' => 'PPI::Statement::Compound',
365             'foreach' => 'PPI::Statement::Compound',
366             'while' => 'PPI::Statement::Compound',
367             'until' => 'PPI::Statement::Compound',
368              
369             # Switch statement
370             'given' => 'PPI::Statement::Given',
371             'when' => 'PPI::Statement::When',
372             'default' => 'PPI::Statement::When',
373              
374             # Various ways of breaking out of scope
375             'redo' => 'PPI::Statement::Break',
376             'next' => 'PPI::Statement::Break',
377             'last' => 'PPI::Statement::Break',
378             'return' => 'PPI::Statement::Break',
379             'goto' => 'PPI::Statement::Break',
380              
381             # Special sections of the file
382             '__DATA__' => 'PPI::Statement::Data',
383             '__END__' => 'PPI::Statement::End',
384             );
385              
386             sub _statement {
387 54347     54347   78965 my ($self, $Parent, $Token) = @_;
388             # my $self = shift;
389             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
390             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
391              
392             # Check for things like ( parent => ... )
393 54347 100 100     271546 if (
394             $Parent->isa('PPI::Structure::List')
395             or
396             $Parent->isa('PPI::Structure::Constructor')
397             ) {
398 7852 100       20751 if ( $Token->isa('PPI::Token::Word') ) {
399             # Is the next significant token a =>
400             # Read ahead to the next significant token
401 1957         2535 my $Next;
402 1957         3051 while ( $Next = $self->_get_token ) {
403 2730 100       6138 unless ( $Next->significant ) {
404 830         1022 push @{$self->{delayed}}, $Next;
  830         1442  
405             # $self->_delay_element( $Next );
406 830         1228 next;
407             }
408              
409             # Got the next token
410 1900 100 100     7053 if (
411             $Next->isa('PPI::Token::Operator')
412             and
413             $Next->content eq '=>'
414             ) {
415             # Is an ordinary expression
416 888         1781 $self->_rollback( $Next );
417 888         3180 return 'PPI::Statement::Expression';
418             } else {
419 1012         1756 last;
420             }
421             }
422              
423             # Rollback and continue
424 1069         1871 $self->_rollback( $Next );
425             }
426             }
427              
428 53459         67313 my $is_lexsub = 0;
429              
430             # Is it a token in our known classes list
431 53459         101500 my $class = $STATEMENT_CLASSES{$Token->content};
432 53459 100       88929 if ( $class ) {
433             # Is the next significant token a =>
434             # Read ahead to the next significant token
435 9597         10906 my $Next;
436 9597         15378 while ( $Next = $self->_get_token ) {
437 18828 100       37824 if ( !$Next->significant ) {
438 9278         10913 push @{$self->{delayed}}, $Next;
  9278         14397  
439 9278         14567 next;
440             }
441              
442             # Scheduled block must be followed by left curly or
443             # semicolon. Otherwise we have something else (e.g.
444             # open( CHECK, ... );
445 9550 100 66     18701 if (
      100        
446             'PPI::Statement::Scheduled' eq $class
447             and not ( $Next->isa( 'PPI::Token::Structure' )
448             and $Next->content =~ m/\A[{;]\z/ ) # }
449             ) {
450 1         11 $class = undef;
451 1         2 last;
452             }
453              
454             # Lexical subroutine
455 9549 100 100     15632 if (
      66        
456             $Token->content =~ /^(?:my|our|state)$/
457             and $Next->isa( 'PPI::Token::Word' ) and $Next->content eq 'sub'
458             ) {
459             # This should be PPI::Statement::Sub rather than PPI::Statement::Variable
460 7         15 $class = undef;
461 7         9 $is_lexsub = 1;
462 7         10 last;
463             }
464              
465             last if
466 9542 100 100     35478 !$Next->isa( 'PPI::Token::Operator' ) or $Next->content ne '=>';
467              
468             # Got the next token
469             # Is an ordinary expression
470 21         51 $self->_rollback( $Next );
471 21         74 return 'PPI::Statement';
472             }
473              
474             # Rollback and continue
475 9576         16589 $self->_rollback( $Next );
476             }
477              
478             # Handle potential barewords for subscripts
479 53438 100       133873 if ( $Parent->isa('PPI::Structure::Subscript') ) {
480             # Fast obvious case, just an expression
481 3829 100 100     8127 unless ( $class and $class->isa('PPI::Statement::Expression') ) {
482 3706         10566 return 'PPI::Statement::Expression';
483             }
484              
485             # This is something like "my" or "our" etc... more subtle.
486             # Check if the next token is a closing curly brace.
487             # This means we are something like $h{my}
488 123         143 my $Next;
489 123         189 while ( $Next = $self->_get_token ) {
490 119 50       251 unless ( $Next->significant ) {
491 0         0 push @{$self->{delayed}}, $Next;
  0         0  
492             # $self->_delay_element( $Next );
493 0         0 next;
494             }
495              
496             # Found the next significant token.
497             # Is it a closing curly brace?
498 119 50       191 if ( $Next->content eq '}' ) {
499 119         218 $self->_rollback( $Next );
500 119         400 return 'PPI::Statement::Expression';
501             } else {
502 0         0 $self->_rollback( $Next );
503 0         0 return $class;
504             }
505             }
506              
507             # End of file... this means it is something like $h{our
508             # which is probably going to be $h{our} ... I think
509 4         8 $self->_rollback( $Next );
510 4         14 return 'PPI::Statement::Expression';
511             }
512              
513             # If it's a token in our list, use that class
514 49609 100       95814 return $class if $class;
515              
516             # Handle the more in-depth sub detection
517 40194 100 100     88237 if ( $is_lexsub || $Token->content eq 'sub' ) {
518             # Read ahead to the next significant token
519 3299         4173 my $Next;
520 3299         5251 while ( $Next = $self->_get_token ) {
521 6539 100       13271 unless ( $Next->significant ) {
522 3264         3531 push @{$self->{delayed}}, $Next;
  3264         5279  
523             # $self->_delay_element( $Next );
524 3264         4999 next;
525             }
526              
527             # Got the next significant token
528 3275         5918 my $sclass = $STATEMENT_CLASSES{$Next->content};
529 3275 100 100     6749 if ( $sclass and $sclass eq 'PPI::Statement::Scheduled' ) {
530 28         275 $self->_rollback( $Next );
531 28         110 return 'PPI::Statement::Scheduled';
532             }
533 3247 100       7983 if ( $Next->isa('PPI::Token::Word') ) {
534 3118         6167 $self->_rollback( $Next );
535 3118         11809 return 'PPI::Statement::Sub';
536             }
537              
538             ### Comment out these two, as they would return PPI::Statement anyway
539             # if ( $content eq '{' ) {
540             # Anonymous sub at start of statement
541             # return 'PPI::Statement';
542             # }
543             #
544             # if ( $Next->isa('PPI::Token::Prototype') ) {
545             # Anonymous sub at start of statement
546             # return 'PPI::Statement';
547             # }
548              
549             # PPI::Statement is the safest fall-through
550 129         287 $self->_rollback( $Next );
551 129         474 return 'PPI::Statement';
552             }
553              
554             # End of file... PPI::Statement::Sub is the most likely
555 24         71 $self->_rollback( $Next );
556 24         130 return 'PPI::Statement::Sub';
557             }
558              
559 36895 100       60954 if ( $Token->content eq 'use' ) {
560             # Add a special case for "use v6" lines.
561 2184         2414 my $Next;
562 2184         3799 while ( $Next = $self->_get_token ) {
563 4363 100       8803 unless ( $Next->significant ) {
564 2181         2410 push @{$self->{delayed}}, $Next;
  2181         4001  
565             # $self->_delay_element( $Next );
566 2181         3701 next;
567             }
568              
569             # Found the next significant token.
570 2182 100 66     9441 if (
    100          
571             $Next->isa('PPI::Token::Operator')
572             and
573             $Next->content eq '=>'
574             ) {
575             # Is an ordinary expression
576 1         5 $self->_rollback( $Next );
577 1         5 return 'PPI::Statement';
578             # Is it a v6 use?
579             } elsif ( $Next->content eq 'v6' ) {
580 2         5 $self->_rollback( $Next );
581 2         18 return 'PPI::Statement::Include::Perl6';
582             } else {
583 2179         4707 $self->_rollback( $Next );
584 2179         8390 return 'PPI::Statement::Include';
585             }
586             }
587              
588             # End of file... this means it is an incomplete use
589             # line, just treat it as a normal include.
590 2         5 $self->_rollback( $Next );
591 2         15 return 'PPI::Statement::Include';
592             }
593              
594             # If our parent is a Condition, we are an Expression
595 34711 100       84627 if ( $Parent->isa('PPI::Structure::Condition') ) {
596 1218         3745 return 'PPI::Statement::Expression';
597             }
598              
599             # If our parent is a List, we are also an expression
600 33493 100       71283 if ( $Parent->isa('PPI::Structure::List') ) {
601 5098         16083 return 'PPI::Statement::Expression';
602             }
603              
604             # Switch statements use expressions, as well.
605 28395 100 100     114917 if (
606             $Parent->isa('PPI::Structure::Given')
607             or
608             $Parent->isa('PPI::Structure::When')
609             ) {
610 6         29 return 'PPI::Statement::Expression';
611             }
612              
613 28389 100       138795 if ( _INSTANCE($Token, 'PPI::Token::Label') ) {
614 347         1438 return 'PPI::Statement::Compound';
615             }
616              
617             # Beyond that, I have no idea for the moment.
618             # Just keep adding more conditions above this.
619 28042         86309 return 'PPI::Statement';
620             }
621              
622             sub _lex_statement {
623 55775     55775   73061 my ($self, $Statement) = @_;
624             # my $self = shift;
625             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
626              
627             # Handle some special statements
628 55775 100       160976 if ( $Statement->isa('PPI::Statement::End') ) {
629 8         26 return $self->_lex_end( $Statement );
630             }
631              
632             # Begin processing tokens
633 55767         62102 my $Token;
634 55767         82236 while ( ref( $Token = $self->_get_token ) ) {
635             # Delay whitespace and comment tokens
636 252340 100       476165 unless ( $Token->significant ) {
637 88343         95712 push @{$self->{delayed}}, $Token;
  88343         126049  
638             # $self->_delay_element( $Token );
639 88343         125295 next;
640             }
641              
642             # Structual closes, and __DATA__ and __END__ tags implicitly
643             # end every type of statement
644 163997 100 66     279761 if (
645             $Token->__LEXER__closes
646             or
647             $Token->isa('PPI::Token::Separator')
648             ) {
649             # Rollback and end the statement
650 17600         33858 return $self->_rollback( $Token );
651             }
652              
653             # Normal statements never implicitly end
654 146397 100       315561 unless ( $Statement->__LEXER__normal ) {
655             # Have we hit an implicit end to the statement
656 24490 100       42703 unless ( $self->_continues( $Statement, $Token ) ) {
657             # Rollback and finish the statement
658 4305         8913 return $self->_rollback( $Token );
659             }
660             }
661              
662             # Any normal character just gets added
663 142092 100       304698 unless ( $Token->isa('PPI::Token::Structure') ) {
664 97348         170882 $self->_add_element( $Statement, $Token );
665 97348         148810 next;
666             }
667              
668             # Handle normal statement terminators
669 44744 100       73179 if ( $Token->content eq ';' ) {
670 22619         44659 $self->_add_element( $Statement, $Token );
671 22619         33384 return 1;
672             }
673              
674             # Which leaves us with a new structure
675              
676             # Determine the class for the structure and create it
677 22125         39430 my $method = $RESOLVE{$Token->content};
678 22125         53616 my $Structure = $self->$method($Statement)->new($Token);
679              
680             # Move the lexing down into the Structure
681 22125         50208 $self->_add_delayed( $Statement );
682 22125         44020 $self->_add_element( $Statement, $Structure );
683 22125         40508 $self->_lex_structure( $Structure );
684             }
685              
686             # Was it an error in the tokenizer?
687 11243 50       18382 unless ( defined $Token ) {
688 0         0 PPI::Exception->throw;
689             }
690              
691             # No, it's just the end of the file...
692             # Roll back any insignificant tokens, they'll get added at the Document level
693 11243         17508 $self->_rollback;
694             }
695              
696             sub _lex_end {
697 8     8   16 my ($self, $Statement) = @_;
698             # my $self = shift;
699             # my $Statement = _INSTANCE(shift, 'PPI::Statement::End') or die "Bad param 1";
700              
701             # End of the file, EVERYTHING is ours
702 8         11 my $Token;
703 8         19 while ( $Token = $self->_get_token ) {
704             # Inlined $Statement->__add_element($Token);
705             Scalar::Util::weaken(
706 15         54 $_PARENT{Scalar::Util::refaddr $Token} = $Statement
707             );
708 15         16 push @{$Statement->{children}}, $Token;
  15         26  
709             }
710              
711             # Was it an error in the tokenizer?
712 8 50       22 unless ( defined $Token ) {
713 0         0 PPI::Exception->throw;
714             }
715              
716             # No, it's just the end of the file...
717             # Roll back any insignificant tokens, they get added at the Document level
718 8         21 $self->_rollback;
719             }
720              
721             # For many statements, it can be difficult to determine the end-point.
722             # This method takes a statement and the next significant token, and attempts
723             # to determine if the there is a statement boundary between the two, or if
724             # the statement can continue with the token.
725             sub _continues {
726 24490     24490   34048 my ($self, $Statement, $Token) = @_;
727             # my $self = shift;
728             # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
729             # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
730              
731             # Handle the simple block case
732             # { print 1; }
733 24490 100 100     45968 if (
734             $Statement->schildren == 1
735             and
736             $Statement->schild(0)->isa('PPI::Structure::Block')
737             ) {
738 48         200 return '';
739             }
740              
741             # Alrighty then, there are six implied-end statement types:
742             # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, ::When,
743             # and ::Package statements.
744 24442 50       48077 return 1
745             if ref $Statement !~ /\b(?:Scheduled|Sub|Compound|Given|When|Package)$/;
746              
747             # Of these six, ::Scheduled, ::Sub, ::Given, and ::When follow the same
748             # simple rule and can be handled first. The block form of ::Package
749             # follows the rule, too. (The non-block form of ::Package
750             # requires a statement terminator, and thus doesn't need to have
751             # an implied end detected.)
752 24442         50016 my @part = $Statement->schildren;
753 24442         32631 my $LastChild = $part[-1];
754             # If the last significant element of the statement is a block,
755             # then an implied-end statement is done, no questions asked.
756 24442 100       103586 return !$LastChild->isa('PPI::Structure::Block')
757             if !$Statement->isa('PPI::Statement::Compound');
758              
759             # Now we get to compound statements, which kind of suck (to lex).
760             # However, of them all, the 'if' type, which includes unless, are
761             # relatively easy to handle compared to the others.
762 5378         11667 my $type = $Statement->type;
763 5378 100       9825 if ( $type eq 'if' ) {
764             # This should be one of the following
765             # if (EXPR) BLOCK
766             # if (EXPR) BLOCK else BLOCK
767             # if (EXPR) BLOCK elsif (EXPR) BLOCK ... else BLOCK
768              
769             # We only implicitly end on a block
770 3373 100       8095 unless ( $LastChild->isa('PPI::Structure::Block') ) {
771             # if (EXPR) ...
772             # if (EXPR) BLOCK else ...
773             # if (EXPR) BLOCK elsif (EXPR) BLOCK ...
774 2332         5159 return 1;
775             }
776              
777             # If the token before the block is an 'else',
778             # it's over, no matter what.
779 1041         2016 my $NextLast = $Statement->schild(-2);
780 1041 50 66     5901 if (
      66        
      66        
781             $NextLast
782             and
783             $NextLast->isa('PPI::Token')
784             and
785             $NextLast->isa('PPI::Token::Word')
786             and
787             $NextLast->content eq 'else'
788             ) {
789 74         253 return '';
790             }
791              
792             # Otherwise, we continue for 'elsif' or 'else' only.
793 967 100 100     3117 if (
      100        
794             $Token->isa('PPI::Token::Word')
795             and (
796             $Token->content eq 'else'
797             or
798             $Token->content eq 'elsif'
799             )
800             ) {
801 299         906 return 1;
802             }
803              
804 668         1921 return '';
805             }
806              
807 2005 100       3594 if ( $type eq 'label' ) {
808             # We only have the label so far, could be any of
809             # LABEL while (EXPR) BLOCK
810             # LABEL while (EXPR) BLOCK continue BLOCK
811             # LABEL for (EXPR; EXPR; EXPR) BLOCK
812             # LABEL foreach VAR (LIST) BLOCK
813             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
814             # LABEL BLOCK continue BLOCK
815              
816             # Handle cases with a word after the label
817 326 100 100     1392 if (
818             $Token->isa('PPI::Token::Word')
819             and
820             $Token->content =~ /^(?:while|until|for|foreach)$/
821             ) {
822 38         105 return 1;
823             }
824              
825             # Handle labelled blocks
826 288 100 66     1117 if ( $Token->isa('PPI::Token::Structure') && $Token->content eq '{' ) {
827 208         613 return 1;
828             }
829              
830 80         188 return '';
831             }
832              
833             # Handle the common "after round braces" case
834 1679 100 100     5991 if ( $LastChild->isa('PPI::Structure') and $LastChild->braces eq '()' ) {
835             # LABEL while (EXPR) ...
836             # LABEL while (EXPR) ...
837             # LABEL for (EXPR; EXPR; EXPR) ...
838             # LABEL for VAR (LIST) ...
839             # LABEL foreach VAR (LIST) ...
840             # Only a block will do
841 372   33     1467 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
842             }
843              
844 1307 100       2248 if ( $type eq 'for' ) {
845             # LABEL for (EXPR; EXPR; EXPR) BLOCK
846 141 100 66     512 if (
    50          
    0          
847             $LastChild->isa('PPI::Token::Word')
848             and
849             $LastChild->content =~ /^for(?:each)?\z/
850             ) {
851             # LABEL for ...
852 128 100 66     820 if (
      100        
853             (
854             $Token->isa('PPI::Token::Structure')
855             and
856             $Token->content eq '('
857             )
858             or
859             $Token->isa('PPI::Token::QuoteLike::Words')
860             ) {
861 21         63 return 1;
862             }
863              
864 107 50       302 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
865             # LABEL for VAR QW{} ...
866             # LABEL foreach VAR QW{} ...
867             # Only a block will do
868 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
869             }
870              
871             # In this case, we can also behave like a foreach
872 107         155 $type = 'foreach';
873              
874             } elsif ( $LastChild->isa('PPI::Structure::Block') ) {
875             # LABEL for (EXPR; EXPR; EXPR) BLOCK
876             # That's it, nothing can continue
877 13         42 return '';
878              
879             } elsif ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
880             # LABEL for VAR QW{} ...
881             # LABEL foreach VAR QW{} ...
882             # Only a block will do
883 0   0     0 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
884             }
885             }
886              
887             # Handle the common continue case
888 1273 100 100     3945 if ( $LastChild->isa('PPI::Token::Word') and $LastChild->content eq 'continue' ) {
889             # LABEL while (EXPR) BLOCK continue ...
890             # LABEL foreach VAR (LIST) BLOCK continue ...
891             # LABEL BLOCK continue ...
892             # Only a block will do
893 6   33     36 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
894             }
895              
896             # Handle the common continuable block case
897 1267 100       3390 if ( $LastChild->isa('PPI::Structure::Block') ) {
898             # LABEL while (EXPR) BLOCK
899             # LABEL while (EXPR) BLOCK ...
900             # LABEL for (EXPR; EXPR; EXPR) BLOCK
901             # LABEL foreach VAR (LIST) BLOCK
902             # LABEL foreach VAR (LIST) BLOCK ...
903             # LABEL BLOCK ...
904             # Is this the block for a continue?
905 419 100 66     2433 if ( _INSTANCE($part[-2], 'PPI::Token::Word') and $part[-2]->content eq 'continue' ) {
906             # LABEL while (EXPR) BLOCK continue BLOCK
907             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
908             # LABEL BLOCK continue BLOCK
909             # That's it, nothing can continue this
910 6         35 return '';
911             }
912              
913             # Only a continue will do
914 413   100     1928 return $Token->isa('PPI::Token::Word') && $Token->content eq 'continue';
915             }
916              
917 848 50       1393 if ( $type eq 'block' ) {
918             # LABEL BLOCK continue BLOCK
919             # Every possible case is covered in the common cases above
920             }
921              
922 848 100       1371 if ( $type eq 'while' ) {
923             # LABEL while (EXPR) BLOCK
924             # LABEL while (EXPR) BLOCK continue BLOCK
925             # LABEL until (EXPR) BLOCK
926             # LABEL until (EXPR) BLOCK continue BLOCK
927             # The only case not covered is the while ...
928 149 50 66     584 if (
      66        
929             $LastChild->isa('PPI::Token::Word')
930             and (
931             $LastChild->content eq 'while'
932             or
933             $LastChild->content eq 'until'
934             )
935             ) {
936             # LABEL while ...
937             # LABEL until ...
938             # Only a condition structure will do
939 149   33     550 return $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
940             }
941             }
942              
943 699 50       1245 if ( $type eq 'foreach' ) {
944             # LABEL foreach VAR (LIST) BLOCK
945             # LABEL foreach VAR (LIST) BLOCK continue BLOCK
946             # The only two cases that have not been covered already are
947             # 'foreach ...' and 'foreach VAR ...'
948              
949 699 100       1718 if ( $LastChild->isa('PPI::Token::Symbol') ) {
950             # LABEL foreach my $scalar ...
951             # Open round brace, or a quotewords
952 207 100 66     877 return 1 if $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
953 16 50       62 return 1 if $Token->isa('PPI::Token::QuoteLike::Words');
954 0         0 return '';
955             }
956              
957 492 100 100     873 if ( $LastChild->content eq 'foreach' or $LastChild->content eq 'for' ) {
958             # There are three possibilities here
959 278 100 100     1154 if (
    100 100        
    100 66        
    100          
960             $Token->isa('PPI::Token::Word')
961             and (
962             ($STATEMENT_CLASSES{ $Token->content } || '')
963             eq
964             'PPI::Statement::Variable'
965             )
966             ) {
967             # VAR == 'my ...'
968 193         530 return 1;
969             } elsif ( $Token->content =~ /^\$/ ) {
970             # VAR == '$scalar'
971 34         113 return 1;
972             } elsif ( $Token->isa('PPI::Token::Structure') and $Token->content eq '(' ) {
973 42         134 return 1;
974             } elsif ( $Token->isa('PPI::Token::QuoteLike::Words') ) {
975 6         27 return 1;
976             } else {
977 3         9 return '';
978             }
979             }
980              
981 214 100 100     553 if (
982             ($STATEMENT_CLASSES{ $LastChild->content } || '')
983             eq
984             'PPI::Statement::Variable'
985             ) {
986             # LABEL foreach my ...
987             # Only a scalar will do
988 189         410 return $Token->content =~ /^\$/;
989             }
990              
991             # Handle the rare for my $foo qw{bar} ... case
992 25 50       79 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
993             # LABEL for VAR QW ...
994             # LABEL foreach VAR QW ...
995             # Only a block will do
996 25   33     104 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
997             }
998             }
999              
1000             # Something we don't know about... what could it be
1001 0         0 PPI::Exception->throw("Illegal state in '$type' compound statement");
1002             }
1003              
1004              
1005              
1006              
1007              
1008             #####################################################################
1009             # Lex Methods - Structure Object
1010              
1011             # Given a parent element, and a ( token to open a structure, determine
1012             # the class that the structure should be.
1013             sub _round {
1014 7976     7976   11886 my ($self, $Parent) = @_;
1015             # my $self = shift;
1016             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1017              
1018             # Get the last significant element in the parent
1019 7976         17123 my $Element = $Parent->schild(-1);
1020 7976 100       38720 if ( _INSTANCE($Element, 'PPI::Token::Word') ) {
1021             # Can it be determined because it is a keyword?
1022 6411         13539 my $rclass = $ROUND{$Element->content};
1023 6411 100       14475 return $rclass if $rclass;
1024             }
1025              
1026             # If we are part of a for or foreach statement, we are a ForLoop
1027 6667 100       36184 if ( $Parent->isa('PPI::Statement::Compound') ) {
    100          
    100          
1028 191 50       622 if ( $Parent->type =~ /^for(?:each)?$/ ) {
1029 191         753 return 'PPI::Structure::For';
1030             }
1031             } elsif ( $Parent->isa('PPI::Statement::Given') ) {
1032 3         24 return 'PPI::Structure::Given';
1033             } elsif ( $Parent->isa('PPI::Statement::When') ) {
1034 3         25 return 'PPI::Structure::When';
1035             }
1036              
1037             # Otherwise, it must be a list
1038              
1039             # If the previous element is -> then we mark it as a dereference
1040 6470 100 100     24746 if ( _INSTANCE($Element, 'PPI::Token::Operator') and $Element->content eq '->' ) {
1041 6         16 $Element->{_dereference} = 1;
1042             }
1043              
1044             'PPI::Structure::List'
1045 6470         19204 }
1046              
1047             # Given a parent element, and a [ token to open a structure, determine
1048             # the class that the structure should be.
1049             sub _square {
1050 3029     3029   5146 my ($self, $Parent) = @_;
1051             # my $self = shift;
1052             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1053              
1054             # Get the last significant element in the parent
1055 3029         6650 my $Element = $Parent->schild(-1);
1056              
1057             # Is this a subscript, like $foo[1] or $foo{expr}
1058            
1059 3029 100       7637 if ( $Element ) {
1060 2778 100 100     8920 if ( $Element->isa('PPI::Token::Operator') and $Element->content eq '->' ) {
1061             # $foo->[]
1062 392         728 $Element->{_dereference} = 1;
1063 392         1288 return 'PPI::Structure::Subscript';
1064             }
1065 2386 100       6554 if ( $Element->isa('PPI::Structure::Subscript') ) {
1066             # $foo{}[]
1067 21         63 return 'PPI::Structure::Subscript';
1068             }
1069 2365 100 100     7047 if ( $Element->isa('PPI::Token::Symbol') and $Element->content =~ /^(?:\$|\@)/ ) {
1070             # $foo[], @foo[]
1071 741         2402 return 'PPI::Structure::Subscript';
1072             }
1073 1624 100 100     5228 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%)/ ) {
1074 43         83 my $prior = $Parent->schild(-2);
1075 43 100 100     200 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1076             # Postfix dereference: ->@[...] ->%[...]
1077 2         11 return 'PPI::Structure::Subscript';
1078             }
1079             }
1080             # FIXME - More cases to catch
1081             }
1082              
1083             # Otherwise, we assume that it's an anonymous arrayref constructor
1084 1873         5202 'PPI::Structure::Constructor';
1085             }
1086              
1087             # Keyword -> Structure class maps
1088             my %CURLY_CLASSES = (
1089             # Blocks
1090             'sub' => 'PPI::Structure::Block',
1091             'grep' => 'PPI::Structure::Block',
1092             'map' => 'PPI::Structure::Block',
1093             'sort' => 'PPI::Structure::Block',
1094             'do' => 'PPI::Structure::Block',
1095             # rely on 'continue' + block being handled elsewhere
1096             # rely on 'eval' + block being handled elsewhere
1097              
1098             # Hash constructors
1099             'scalar' => 'PPI::Structure::Constructor',
1100             '=' => 'PPI::Structure::Constructor',
1101             '||=' => 'PPI::Structure::Constructor',
1102             '&&=' => 'PPI::Structure::Constructor',
1103             '//=' => 'PPI::Structure::Constructor',
1104             '||' => 'PPI::Structure::Constructor',
1105             '&&' => 'PPI::Structure::Constructor',
1106             '//' => 'PPI::Structure::Constructor',
1107             '?' => 'PPI::Structure::Constructor',
1108             ':' => 'PPI::Structure::Constructor',
1109             ',' => 'PPI::Structure::Constructor',
1110             '=>' => 'PPI::Structure::Constructor',
1111             '+' => 'PPI::Structure::Constructor', # per perlref
1112             'return' => 'PPI::Structure::Constructor', # per perlref
1113             'bless' => 'PPI::Structure::Constructor', # pragmatic --
1114             # perlfunc says first arg is a reference, and
1115             # bless {; ... } fails to compile.
1116             );
1117              
1118             my @CURLY_LOOKAHEAD_CLASSES = (
1119             {}, # not used
1120             {
1121             ';' => 'PPI::Structure::Block', # per perlref
1122             '}' => 'PPI::Structure::Constructor',
1123             },
1124             {
1125             '=>' => 'PPI::Structure::Constructor',
1126             },
1127             );
1128              
1129              
1130             # Given a parent element, and a { token to open a structure, determine
1131             # the class that the structure should be.
1132             sub _curly {
1133 11120     11120   17287 my ($self, $Parent) = @_;
1134             # my $self = shift;
1135             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1136              
1137             # Get the last significant element in the parent
1138 11120         21203 my $Element = $Parent->schild(-1);
1139 11120 100       31580 my $content = $Element ? $Element->content : '';
1140              
1141             # Is this a subscript, like $foo[1] or $foo{expr}
1142 11120 100       23703 if ( $Element ) {
1143 10478 100 66     24608 if ( $content eq '->' and $Element->isa('PPI::Token::Operator') ) {
1144             # $foo->{}
1145 2066         3236 $Element->{_dereference} = 1;
1146 2066         6173 return 'PPI::Structure::Subscript';
1147             }
1148 8412 100       23509 if ( $Element->isa('PPI::Structure::Subscript') ) {
1149             # $foo[]{}
1150 79         234 return 'PPI::Structure::Subscript';
1151             }
1152 8333 100 100     24830 if ( $content =~ /^(?:\$|\@)/ and $Element->isa('PPI::Token::Symbol') ) {
1153             # $foo{}, @foo{}
1154 535         1704 return 'PPI::Structure::Subscript';
1155             }
1156 7798 100 100     23417 if ( $Element->isa('PPI::Token::Cast') and $Element->content =~ /^(?:\@|\%|\*)/ ) {
1157 300         711 my $prior = $Parent->schild(-2);
1158 300 100 100     1666 if ( $prior and $prior->isa('PPI::Token::Operator') and $prior->content eq '->' ) {
      100        
1159             # Postfix dereference: ->@{...} ->%{...} ->*{...}
1160 3         11 return 'PPI::Structure::Subscript';
1161             }
1162             }
1163 7795 100       18030 if ( $Element->isa('PPI::Structure::Block') ) {
1164             # deference - ${$hash_ref}{foo}
1165             # or even ${burfle}{foo}
1166             # hash slice - @{$hash_ref}{'foo', 'bar'}
1167 2 50       8 if ( my $prior = $Parent->schild(-2) ) {
1168 2         8 my $prior_content = $prior->content();
1169 2 50 66     28 $prior->isa( 'PPI::Token::Cast' )
      33        
1170             and ( $prior_content eq '@' ||
1171             $prior_content eq '$' )
1172             and return 'PPI::Structure::Subscript';
1173             }
1174             }
1175              
1176             # Are we the last argument of sub?
1177             # E.g.: 'sub foo {}', 'sub foo ($) {}'
1178 7793 100       22948 return 'PPI::Structure::Block' if $Parent->isa('PPI::Statement::Sub');
1179              
1180             # Are we the second or third argument of package?
1181             # E.g.: 'package Foo {}' or 'package Foo v1.2.3 {}'
1182 5495 100       15958 return 'PPI::Structure::Block'
1183             if $Parent->isa('PPI::Statement::Package');
1184              
1185 4202 100       8418 if ( $CURLY_CLASSES{$content} ) {
1186             # Known type
1187 830         2735 return $CURLY_CLASSES{$content};
1188             }
1189             }
1190              
1191             # Are we in a compound statement
1192 4014 100       9856 if ( $Parent->isa('PPI::Statement::Compound') ) {
1193             # We will only encounter blocks in compound statements
1194 1852         5207 return 'PPI::Structure::Block';
1195             }
1196              
1197             # Are we the second or third argument of use
1198 2162 100       5656 if ( $Parent->isa('PPI::Statement::Include') ) {
1199 53 50 33     135 if ( $Parent->schildren == 2 ||
      66        
1200             $Parent->schildren == 3 &&
1201             $Parent->schild(2)->isa('PPI::Token::Number')
1202             ) {
1203             # This is something like use constant { ... };
1204 53         189 return 'PPI::Structure::Constructor';
1205             }
1206             }
1207              
1208             # Unless we are at the start of the statement, everything else should be a block
1209             ### FIXME This is possibly a bad choice, but will have to do for now.
1210 2109 100       7860 return 'PPI::Structure::Block' if $Element;
1211              
1212 642 100 66     2377 if (
1213             $Parent->isa('PPI::Statement')
1214             and
1215             _INSTANCE($Parent->parent, 'PPI::Structure::List')
1216             ) {
1217 168         386 my $function = $Parent->parent->parent->schild(-2);
1218              
1219             # Special case: Are we the param of a core function
1220             # i.e. map({ $_ => 1 } @foo)
1221 168 100 100     633 return 'PPI::Structure::Block'
1222             if $function and $function->content =~ /^(?:map|grep|sort|eval|do)$/;
1223              
1224             # If not part of a block print, list-embedded curlies are most likely constructors
1225 74 100 100     304 return 'PPI::Structure::Constructor'
1226             if not $function or $function->content !~ /^(?:print|say)$/;
1227             }
1228              
1229             # We need to scan ahead.
1230 480         755 my $Next;
1231 480         569 my $position = 0;
1232 480         551 my @delayed;
1233 480         875 while ( $Next = $self->_get_token ) {
1234 1179 100       2461 unless ( $Next->significant ) {
1235 192         313 push @delayed, $Next;
1236 192         321 next;
1237             }
1238              
1239             # If we are off the end of the lookahead array,
1240 987 100       2474 if ( ++$position >= @CURLY_LOOKAHEAD_CLASSES ) {
    100          
1241             # default to block.
1242 127         478 $self->_buffer( splice(@delayed), $Next );
1243 127         186 last;
1244             # If the content at this position is known
1245             } elsif ( my $class = $CURLY_LOOKAHEAD_CLASSES[$position]
1246             {$Next->content} ) {
1247             # return the associated class.
1248 268         618 $self->_buffer( splice(@delayed), $Next );
1249 268         949 return $class;
1250             }
1251              
1252             # Delay and continue
1253 592         1193 push @delayed, $Next;
1254             }
1255              
1256             # Hit the end of the document, or bailed out, go with block
1257 212         533 $self->_buffer( splice(@delayed) );
1258 212 50       509 if ( ref $Parent eq 'PPI::Statement' ) {
1259 212         321 bless $Parent, 'PPI::Statement::Compound';
1260             }
1261 212         765 return 'PPI::Structure::Block';
1262             }
1263              
1264              
1265             sub _lex_structure {
1266 22125     22125   30333 my ($self, $Structure) = @_;
1267             # my $self = shift;
1268             # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";
1269              
1270             # Start the processing loop
1271 22125         25038 my $Token;
1272 22125         31339 while ( ref($Token = $self->_get_token) ) {
1273             # Is this a direct type token
1274 88385 100       172095 unless ( $Token->significant ) {
1275 41847         43153 push @{$self->{delayed}}, $Token;
  41847         60904  
1276             # $self->_delay_element( $Token );
1277 41847         62715 next;
1278             }
1279              
1280             # Anything other than a Structure starts a Statement
1281 46538 100       121033 unless ( $Token->isa('PPI::Token::Structure') ) {
1282             # Because _statement may well delay and rollback itself,
1283             # we need to add the delayed tokens early
1284 25716         53800 $self->_add_delayed( $Structure );
1285              
1286             # Determine the class for the Statement and create it
1287 25716         45224 my $Statement = $self->_statement($Structure, $Token)->new($Token);
1288              
1289             # Move the lexing down into the Statement
1290 25716         55182 $self->_add_element( $Structure, $Statement );
1291 25716         53682 $self->_lex_statement( $Statement );
1292              
1293 25716         45134 next;
1294             }
1295              
1296             # Is this the opening of another structure directly inside us?
1297 20822 100       37286 if ( $Token->__LEXER__opens ) {
1298             # Rollback the Token, and recurse into the statement
1299 469         1292 $self->_rollback( $Token );
1300 469         1171 my $Statement = PPI::Statement->new;
1301 469         1083 $self->_add_element( $Structure, $Statement );
1302 469         1257 $self->_lex_statement( $Statement );
1303 469         1147 next;
1304             }
1305              
1306             # Is this the close of a structure ( which would be an error )
1307 20353 100       39274 if ( $Token->__LEXER__closes ) {
1308             # Is this OUR closing structure
1309 20303 100       38966 if ( $Token->content eq $Structure->start->__LEXER__opposite ) {
1310             # Add any delayed tokens, and the finishing token (the ugly way)
1311 19578         39281 $self->_add_delayed( $Structure );
1312 19578         29182 $Structure->{finish} = $Token;
1313             Scalar::Util::weaken(
1314 19578         69033 $_PARENT{Scalar::Util::refaddr $Token} = $Structure
1315             );
1316              
1317             # Confirm that ForLoop structures are actually so, and
1318             # aren't really a list.
1319 19578 100       53614 if ( $Structure->isa('PPI::Structure::For') ) {
1320 229 100       866 if ( 2 > scalar grep {
1321 589         1882 $_->isa('PPI::Statement')
1322             } $Structure->children ) {
1323 208         397 bless($Structure, 'PPI::Structure::List');
1324             }
1325             }
1326 19578         47600 return 1;
1327             }
1328              
1329             # Unmatched closing brace.
1330             # Either they typed the wrong thing, or haven't put
1331             # one at all. Either way it's an error we need to
1332             # somehow handle gracefully. For now, we'll treat it
1333             # as implicitly ending the structure. This causes the
1334             # least damage across the various reasons why this
1335             # might have happened.
1336 725         1219 return $self->_rollback( $Token );
1337             }
1338              
1339             # It's a semi-colon on its own, just inside the block.
1340             # This is a null statement.
1341             $self->_add_element(
1342 50         201 $Structure,
1343             PPI::Statement::Null->new($Token),
1344             );
1345             }
1346              
1347             # Is this an error
1348 1822 50       2821 unless ( defined $Token ) {
1349 0         0 PPI::Exception->throw;
1350             }
1351              
1352             # No, it's just the end of file.
1353             # Add any insignificant trailing tokens.
1354 1822         2992 $self->_add_delayed( $Structure );
1355             }
1356              
1357              
1358              
1359              
1360              
1361             #####################################################################
1362             # Support Methods
1363              
1364             # Get the next token for processing, handling buffering
1365             sub _get_token {
1366 456927 100   456927   448496 shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
  456927         1257420  
1367             }
1368              
1369             # Old long version of the above
1370             # my $self = shift;
1371             # # First from the buffer
1372             # if ( @{$self->{buffer}} ) {
1373             # return shift @{$self->{buffer}};
1374             # }
1375             #
1376             # # Then from the Tokenizer
1377             # $self->{Tokenizer}->get_token;
1378             # }
1379              
1380             # Delay the addition of insignificant elements.
1381             # This ended up being inlined.
1382             # sub _delay_element {
1383             # my $self = shift;
1384             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 1";
1385             # push @{ $_[0]->{delayed} }, $_[1];
1386             # }
1387              
1388             # Add an Element to a Node, including any delayed Elements
1389             sub _add_element {
1390 220761     220761   283952 my ($self, $Parent, $Element) = @_;
1391             # my $self = shift;
1392             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1393             # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 2";
1394              
1395             # Handle a special case, where a statement is not fully resolved
1396 220761 100 100     443246 if ( ref $Parent eq 'PPI::Statement'
1397             and my $first = $Parent->schild(0) ) {
1398 65375 50 33     176372 if ( $first->isa('PPI::Token::Label')
1399             and !(my $second = $Parent->schild(1)) ) {
1400 0         0 my $new_class = $STATEMENT_CLASSES{$second->content};
1401             # It's a labelled statement
1402 0 0       0 bless $Parent, $new_class if $new_class;
1403             }
1404             }
1405              
1406             # Add first the delayed, from the front, then the passed element
1407 220761         224251 foreach my $el ( @{$self->{delayed}} ) {
  220761         324982  
1408             Scalar::Util::weaken(
1409 57894         213476 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1410             );
1411             # Inlined $Parent->__add_element($el);
1412             }
1413             Scalar::Util::weaken(
1414 220761         762861 $_PARENT{Scalar::Util::refaddr $Element} = $Parent
1415             );
1416 220761         211502 push @{$Parent->{children}}, @{$self->{delayed}}, $Element;
  220761         267579  
  220761         319585  
1417              
1418             # Clear the delayed elements
1419 220761         336785 $self->{delayed} = [];
1420             }
1421              
1422             # Specifically just add any delayed tokens, if any.
1423             sub _add_delayed {
1424 114574     114574   156789 my ($self, $Parent) = @_;
1425             # my $self = shift;
1426             # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1427              
1428             # Add any delayed
1429 114574         118144 foreach my $el ( @{$self->{delayed}} ) {
  114574         179084  
1430             Scalar::Util::weaken(
1431 52556         184544 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1432             );
1433             # Inlined $Parent->__add_element($el);
1434             }
1435 114574         126521 push @{$Parent->{children}}, @{$self->{delayed}};
  114574         143421  
  114574         152915  
1436              
1437             # Clear the delayed elements
1438 114574         182932 $self->{delayed} = [];
1439             }
1440              
1441             # Rollback the delayed tokens, plus any passed. Once all the tokens
1442             # have been moved back on to the buffer, the order should be.
1443             # <--- @{$self->{delayed}}, @_, @{$self->{buffer}} <----
1444             sub _rollback {
1445 52469     52469   61542 my $self = shift;
1446              
1447             # First, put any passed objects back
1448 52469 100       83900 if ( @_ ) {
1449 41218         43025 unshift @{$self->{buffer}}, splice @_;
  41218         82250  
1450             }
1451              
1452             # Then, put back anything delayed
1453 52469 100       60038 if ( @{$self->{delayed}} ) {
  52469         90697  
1454 28616         29645 unshift @{$self->{buffer}}, splice @{$self->{delayed}};
  28616         35820  
  28616         40994  
1455             }
1456              
1457 52469         73434 1;
1458             }
1459              
1460             # Partial rollback, just return a single list to the buffer
1461             sub _buffer {
1462 607     607   711 my $self = shift;
1463              
1464             # Put any passed objects back
1465 607 100       966 if ( @_ ) {
1466 468         584 unshift @{$self->{buffer}}, splice @_;
  468         922  
1467             }
1468              
1469 607         739 1;
1470             }
1471              
1472              
1473              
1474              
1475              
1476             #####################################################################
1477             # Error Handling
1478              
1479             # Set the error message
1480             sub _error {
1481 2     2   5 $errstr = $_[1];
1482 2         8 undef;
1483             }
1484              
1485             # Clear the error message.
1486             # Returns the object as a convenience.
1487             sub _clear {
1488 16705     16705   23330 $errstr = '';
1489 16705         24358 $_[0];
1490             }
1491              
1492             =pod
1493              
1494             =head2 errstr
1495              
1496             For any error that occurs, you can use the C, as either
1497             a static or object method, to access the error message.
1498              
1499             If no error occurs for any particular action, C will return false.
1500              
1501             =cut
1502              
1503             sub errstr {
1504 2     2 1 10 $errstr;
1505             }
1506              
1507              
1508              
1509              
1510              
1511             #####################################################################
1512             # PDOM Extensions
1513             #
1514             # This is something of a future expansion... ignore it for now :)
1515             #
1516             # use PPI::Statement::Sub ();
1517             #
1518             # sub PPI::Statement::Sub::__LEXER__normal { '' }
1519              
1520             1;
1521              
1522             =pod
1523              
1524             =head1 TO DO
1525              
1526             - Add optional support for some of the more common source filters
1527              
1528             - Some additional checks for blessing things into various Statement
1529             and Structure subclasses.
1530              
1531             =head1 SUPPORT
1532              
1533             See the L in the main module.
1534              
1535             =head1 AUTHOR
1536              
1537             Adam Kennedy Eadamk@cpan.orgE
1538              
1539             =head1 COPYRIGHT
1540              
1541             Copyright 2001 - 2011 Adam Kennedy.
1542              
1543             This program is free software; you can redistribute
1544             it and/or modify it under the same terms as Perl itself.
1545              
1546             The full text of the license can be found in the
1547             LICENSE file included with this module.
1548              
1549             =cut