File Coverage

stolen_chunk_of_toke.c
Criterion Covered Total %
statement 122 368 33.1
branch 97 606 16.0
condition n/a
subroutine n/a
pod n/a
total 219 974 22.4


line stmt bran cond sub pod time code
1             /* stolen_chunk_of_toke.c - from perl 5.8.8 toke.c
2             *
3             * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
4             * 2000, 2001, 2002, 2003, 2004, 2005, 2006, by Larry Wall and others
5             *
6             * You may distribute under the terms of either the GNU General Public
7             * License or the Artistic License, as specified in the README file.
8             *
9             */
10              
11             /*
12             * "It all comes from here, the stench and the peril." --Frodo
13             */
14              
15             /*
16             * this is all blatantly stolen. I sincerely hopes it doesn't fuck anything
17             * up but if it does blame me (Matt S Trout), not the poor original authors
18             */
19              
20             #include "ppport.h"
21              
22             /* the following #defines are stolen from assorted headers, not toke.c (mst) */
23              
24             #define skipspace(a) S_skipspace(aTHX_ a, 0)
25             #define peekspace(a) S_skipspace(aTHX_ a, 1)
26             #define skipspace_force(a) S_skipspace(aTHX_ a, 2)
27             #define incline(a) S_incline(aTHX_ a)
28             #define filter_gets(a,b,c) S_filter_gets(aTHX_ a,b,c)
29             #define scan_str(a,b,c) S_scan_str(aTHX_ a,b,c)
30             #define scan_word(a,b,c,d,e) S_scan_word(aTHX_ a,b,c,d,e)
31             #define scan_ident(a,b,c,d,e) S_scan_ident(aTHX_ a,b,c,d,e)
32              
33             STATIC void S_incline(pTHX_ char *s);
34             STATIC char* S_skipspace(pTHX_ char *s, int incline);
35             STATIC char * S_filter_gets(pTHX_ SV *sv, PerlIO *fp, STRLEN append);
36             STATIC char* S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims);
37             STATIC char* S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp);
38              
39             #define DPTR2FPTR(t,p) ((t)PTR2nat(p)) /* data pointer to function pointer */
40             #define FPTR2DPTR(t,p) ((t)PTR2nat(p)) /* function pointer to data pointer */
41             #define PTR2nat(p) (PTRV)(p) /* pointer to integer of PTRSIZE */
42              
43             /* conditionalise these two because as of 5.9.5 we already get them from
44             the headers (mst) */
45             #ifndef Newx
46             #define Newx(v,n,t) (v = (MEM_WRAP_CHECK_(n,t) (t*)safemalloc((MEM_SIZE)((n)*sizeof(t)))))
47             #endif
48             #ifndef SvPVX_const
49             #define SvPVX_const(sv) ((const char*) (0 + SvPVX(sv)))
50             #endif
51             #ifndef MEM_WRAP_CHECK_
52             #define MEM_WRAP_CHECK_(n,t) MEM_WRAP_CHECK(n,t),
53             #endif
54              
55             #define SvPV_renew(sv,n) \
56             STMT_START { SvLEN_set(sv, n); \
57             SvPV_set((sv), (MEM_WRAP_CHECK_(n,char) \
58             (char*)saferealloc((Malloc_t)SvPVX(sv), \
59             (MEM_SIZE)((n))))); \
60             } STMT_END
61              
62             #define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
63              
64             /* On MacOS, respect nonbreaking spaces */
65             #ifdef MACOS_TRADITIONAL
66             #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\312'||(c)=='\t')
67             #else
68             #define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
69             #endif
70              
71             /*
72             * Normally, during compile time, PL_curcop == &PL_compiling is true. However,
73             * Devel::Declare makes the interpreter call back to perl during compile time,
74             * which temporarily enters runtime. Then perl space calls various functions
75             * from this file, which are designed to work during compile time. They all
76             * happen to operate on PL_curcop, not PL_compiling. That doesn't make a
77             * difference in the core, but it does for Devel::Declare, which operates at
78             * runtime, but still wants to mangle the things that are about to be compiled.
79             * That's why we define our own PL_curcop and make it point to PL_compiling
80             * here.
81             */
82             #undef PL_curcop
83             #define PL_curcop (&PL_compiling)
84              
85             #define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
86              
87             #define LEX_NORMAL 10 /* normal code (ie not within "...") */
88             #define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
89             #define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
90             #define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
91             #define LEX_INTERPSTART 6 /* expecting the start of a $var */
92              
93             /* at end of code, eg "$x" followed by: */
94             #define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
95             #define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
96              
97             #define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
98             string or after \E, $foo, etc */
99             #define LEX_INTERPCONST 2 /* NOT USED */
100             #define LEX_FORMLINE 1 /* expecting a format line */
101             #define LEX_KNOWNEXT 0 /* next token known; just return it */
102              
103             /* and these two are my own madness (mst) */
104              
105             #if PERL_REVISION == 5 && PERL_VERSION == 8 && PERL_SUBVERSION >= 8
106             #define PERL_5_8_8_PLUS
107             #endif
108              
109             #if PERL_REVISION == 5 && PERL_VERSION > 8
110             #define PERL_5_9_PLUS
111             #endif
112              
113             #if !defined(PERL_5_9_PLUS) && defined(PERL_IMPLICIT_CONTEXT)
114             /* These two are not exported from the core on Windows. With 5.9+
115             it's not an issue, because they're part of the PL_parser structure,
116             which is exported. On multiplicity/thread builds we can work
117             around the lack of export by this formulation, where we provide
118             a substitute implementation of the unexported accessor functions.
119             On single-interpreter builds we can't, because access is directly
120             via symbols that are not exported. */
121             # define Perl_Ilinestart_ptr my_Ilinestart_ptr
122             char **my_Ilinestart_ptr(pTHX) { return &(aTHX->Ilinestart); }
123             # define Perl_Isublex_info_ptr my_Isublex_info_ptr
124             static SUBLEXINFO *my_Isublex_info_ptr(pTHX) { return &(aTHX->Isublex_info); }
125             #endif
126              
127             #ifdef PERL_5_9_PLUS
128             /* 5.9+ moves a bunch of things to a PL_parser struct so we need to
129             declare the backcompat macros for things to still work (mst) */
130              
131             /* XXX temporary backwards compatibility */
132             #define PL_lex_brackets (PL_parser->lex_brackets)
133             #define PL_lex_brackstack (PL_parser->lex_brackstack)
134             #define PL_lex_casemods (PL_parser->lex_casemods)
135             #define PL_lex_casestack (PL_parser->lex_casestack)
136             #define PL_lex_defer (PL_parser->lex_defer)
137             #define PL_lex_dojoin (PL_parser->lex_dojoin)
138             #define PL_lex_expect (PL_parser->lex_expect)
139             #define PL_lex_formbrack (PL_parser->lex_formbrack)
140             #define PL_lex_inpat (PL_parser->lex_inpat)
141             #define PL_lex_inwhat (PL_parser->lex_inwhat)
142             #define PL_lex_op (PL_parser->lex_op)
143             #define PL_lex_repl (PL_parser->lex_repl)
144             #define PL_lex_starts (PL_parser->lex_starts)
145             #define PL_lex_stuff (PL_parser->lex_stuff)
146             #define PL_multi_start (PL_parser->multi_start)
147             #define PL_multi_open (PL_parser->multi_open)
148             #define PL_multi_close (PL_parser->multi_close)
149             #define PL_pending_ident (PL_parser->pending_ident)
150             #define PL_preambled (PL_parser->preambled)
151             #define PL_sublex_info (PL_parser->sublex_info)
152             #define PL_linestr (PL_parser->linestr)
153             #define PL_sublex_info (PL_parser->sublex_info)
154             #define PL_linestr (PL_parser->linestr)
155             #define PL_expect (PL_parser->expect)
156             #define PL_copline (PL_parser->copline)
157             #define PL_bufptr (PL_parser->bufptr)
158             #define PL_oldbufptr (PL_parser->oldbufptr)
159             #define PL_oldoldbufptr (PL_parser->oldoldbufptr)
160             #define PL_linestart (PL_parser->linestart)
161             #define PL_bufend (PL_parser->bufend)
162             #define PL_last_uni (PL_parser->last_uni)
163             #define PL_last_lop (PL_parser->last_lop)
164             #define PL_last_lop_op (PL_parser->last_lop_op)
165             #define PL_lex_state (PL_parser->lex_state)
166             #define PL_rsfp (PL_parser->rsfp)
167             #define PL_rsfp_filters (PL_parser->rsfp_filters)
168             #define PL_in_my (PL_parser->in_my)
169             #define PL_in_my_stash (PL_parser->in_my_stash)
170             #define PL_tokenbuf (PL_parser->tokenbuf)
171             #define PL_multi_end (PL_parser->multi_end)
172             #define PL_error_count (PL_parser->error_count)
173             #define PL_nexttoke (PL_parser->nexttoke)
174             /* these are from the non-PERL_MAD path but I don't -think- I need
175             the PERL_MAD stuff since my code isn't really populating things (mst) */
176             # ifdef PERL_MAD
177             # define PL_curforce (PL_parser->curforce)
178             # define PL_lasttoke (PL_parser->lasttoke)
179             # else
180             # define PL_nexttype (PL_parser->nexttype)
181             # define PL_nextval (PL_parser->nextval)
182             # endif
183             /* end of backcompat macros from 5.9 toke.c (mst) */
184             #endif
185              
186             /* when ccflags include -DDEBUGGING we need this for earlier 5.8 perls */
187             #ifndef SvPV_nolen_const
188             #define SvPV_nolen_const SvPV_nolen
189             #endif
190              
191             /* utf8_to_uvchr_buf() not defined in earlier perls, but less-capable
192             * substitute is available */
193              
194             #ifndef utf8_to_uvchr_buf
195             #define utf8_to_uvchr_buf(s, e, lp) ((e), utf8_to_uvchr(s, lp))
196             #endif
197              
198             #ifndef isIDFIRST_lazy_if_safe
199             # define isIDFIRST_lazy_if_safe(p,e,UTF) \
200             ((! UTF || p > e) ? isIDFIRST_lazy_if(p,UTF) : 0)
201             #endif
202             #ifndef isALNUM_lazy_if_safe
203             # define isALNUM_lazy_if_safe(p,e,UTF) \
204             ((! UTF || p > e) ? isALNUM_lazy_if(p,UTF) : 0)
205             #endif
206             #ifndef isALNUM_utf8_safe
207             # define isALNUM_utf8_safe(p,e) ((p > e) ? isALNUM_utf8(p) : 0)
208             #endif
209              
210             /* and now we're back to the toke.c stuff again (mst) */
211              
212             static const char ident_too_long[] =
213             "Identifier too long";
214             static const char c_without_g[] =
215             "Use of /c modifier is meaningless without /g";
216             static const char c_in_subst[] =
217             "Use of /c modifier is meaningless in s///";
218              
219             #ifdef USE_UTF8_SCRIPTS
220             # define UTF (!IN_BYTES)
221             #else
222             # define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
223             #endif
224              
225             /* Invoke the idxth filter function for the current rsfp. */
226             /* maxlen 0 = read one text line */
227             I32
228 0           Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
229             {
230             filter_t funcp;
231 0           SV *datasv = NULL;
232              
233 0 0         if (!PL_rsfp_filters)
234 0           return -1;
235 0 0         if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
236             /* Provide a default input filter to make life easy. */
237             /* Note that we append to the line. This is handy. */
238             DEBUG_P(PerlIO_printf(Perl_debug_log,
239             "filter_read %d: from rsfp\n", idx));
240 0 0         if (maxlen) {
241             /* Want a block */
242             int len ;
243 0           const int old_len = SvCUR(buf_sv);
244              
245             /* ensure buf_sv is large enough */
246 0 0         SvGROW(buf_sv, (STRLEN)(old_len + maxlen)) ;
    0          
247 0 0         if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len, maxlen)) <= 0){
248 0 0         if (PerlIO_error(PL_rsfp))
249 0           return -1; /* error */
250             else
251 0           return 0 ; /* end of file */
252             }
253 0           SvCUR_set(buf_sv, old_len + len) ;
254             } else {
255             /* Want a line */
256 0 0         if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
257 0 0         if (PerlIO_error(PL_rsfp))
258 0           return -1; /* error */
259             else
260 0           return 0 ; /* end of file */
261             }
262             }
263 0           return SvCUR(buf_sv);
264             }
265             /* Skip this filter slot if filter has been deleted */
266 0 0         if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
    0          
267             DEBUG_P(PerlIO_printf(Perl_debug_log,
268             "filter_read %d: skipped (filter deleted)\n",
269             idx));
270 0           return FILTER_READ(idx+1, buf_sv, maxlen); /* recurse */
271             }
272             /* Get function pointer hidden within datasv */
273 0           funcp = DPTR2FPTR(filter_t, IoANY(datasv));
274             DEBUG_P(PerlIO_printf(Perl_debug_log,
275             "filter_read %d: via function %p (%s)\n",
276             idx, datasv, SvPV_nolen_const(datasv)));
277             /* Call function. The function is expected to */
278             /* call "FILTER_READ(idx+1, buf_sv)" first. */
279             /* Return: <0:error, =0:eof, >0:not eof */
280 0           return (*funcp)(aTHX_ idx, buf_sv, maxlen);
281             }
282              
283             STATIC char *
284 32           S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
285             {
286             #ifdef PERL_CR_FILTER
287             if (!PL_rsfp_filters) {
288             filter_add(S_cr_textfilter,NULL);
289             }
290             #endif
291 32 50         if (PL_rsfp_filters) {
292 32 100         if (!append)
293 20           SvCUR_set(sv, 0); /* start with empty line */
294 32 50         if (FILTER_READ(0, sv, 0) > 0)
295 32           return ( SvPVX(sv) ) ;
296             else
297 0           return Nullch ;
298             }
299             else
300 0           return (sv_gets(sv, fp, append));
301             }
302              
303             /*
304             * S_skipspace
305             * Called to gobble the appropriate amount and type of whitespace.
306             * Skips comments as well.
307             */
308              
309             STATIC char *
310 506           S_skipspace(pTHX_ register char *s, int incline)
311             {
312 506 50         if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
    0          
313 0 0         while (s < PL_bufend && SPACE_OR_TAB(*s))
    0          
    0          
314 0           s++;
315 0           return s;
316             }
317             for (;;) {
318             STRLEN prevlen;
319             SSize_t oldprevlen, oldoldprevlen;
320 518           SSize_t oldloplen = 0, oldunilen = 0;
321 946 100         while (s < PL_bufend && isSPACE(*s)) {
    100          
322 428 100         if (*s++ == '\n' && ((incline == 2) || (PL_in_eval && !PL_rsfp && !incline)))
    100          
    50          
    0          
    0          
323 3           incline(s);
324             }
325              
326             /* comment */
327 518 100         if (s < PL_bufend && *s == '#') {
    100          
328 47 50         while (s < PL_bufend && *s != '\n')
    100          
329 42           s++;
330 5 50         if (s < PL_bufend) {
331 5           s++;
332 5 50         if (PL_in_eval && !PL_rsfp && !incline) {
    0          
    0          
333 0           incline(s);
334 0           continue;
335             }
336             }
337             }
338              
339             /* also skip leading whitespace on the beginning of a line before deciding
340             * whether or not to recharge the linestr. --rafl
341             */
342 524 100         while (s < PL_bufend && isSPACE(*s)) {
    100          
343 6 50         if (*s++ == '\n' && PL_in_eval && !PL_rsfp && !incline)
    0          
    0          
    0          
344 0           incline(s);
345             }
346              
347             /* only continue to recharge the buffer if we're at the end
348             * of the buffer, we're not reading from a source filter, and
349             * we're in normal lexing mode
350             */
351 518 100         if (s < PL_bufend || !PL_rsfp || PL_lex_inwhat ||
    100          
    50          
    50          
352 12           PL_lex_state == LEX_FORMLINE)
353 506           return s;
354              
355             /* try to recharge the buffer */
356 12 50         if ((s = filter_gets(PL_linestr, PL_rsfp,
357             (prevlen = SvCUR(PL_linestr)))) == Nullch)
358             {
359             /* end of file. Add on the -p or -n magic */
360 0 0         if (PL_minus_p) {
361 0           sv_setpv(PL_linestr,
362             ";}continue{print or die qq(-p destination: $!\\n);}");
363 0           PL_minus_n = PL_minus_p = 0;
364             }
365 0 0         else if (PL_minus_n) {
366 0           sv_setpvn(PL_linestr, ";}", 2);
367 0           PL_minus_n = 0;
368             }
369             else
370 0           sv_setpvn(PL_linestr,";", 1);
371              
372             /* reset variables for next time we lex */
373 0           PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart
374 0           = SvPVX(PL_linestr);
375 0           PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
376 0           PL_last_lop = PL_last_uni = Nullch;
377              
378             /* In perl versions previous to p4-rawid: //depot/perl@32954 -P
379             * preprocessors were supported here. We don't support -P at all, even
380             * on perls that support it, and use the following chunk from blead
381             * perl. (rafl)
382             */
383              
384             /* Close the filehandle. Could be from
385             * STDIN, or a regular file. If we were reading code from
386             * STDIN (because the commandline held no -e or filename)
387             * then we don't close it, we reset it so the code can
388             * read from STDIN too.
389             */
390              
391 0 0         if ((PerlIO*)PL_rsfp == PerlIO_stdin())
392 0           PerlIO_clearerr(PL_rsfp);
393             else
394 0           (void)PerlIO_close(PL_rsfp);
395 0           PL_rsfp = Nullfp;
396 0           return s;
397             }
398              
399             /* not at end of file, so we only read another line */
400             /* make corresponding updates to old pointers, for yyerror() */
401 12           oldprevlen = PL_oldbufptr - PL_bufend;
402 12           oldoldprevlen = PL_oldoldbufptr - PL_bufend;
403 12 50         if (PL_last_uni)
404 0           oldunilen = PL_last_uni - PL_bufend;
405 12 50         if (PL_last_lop)
406 0           oldloplen = PL_last_lop - PL_bufend;
407 12           PL_linestart = PL_bufptr = s + prevlen;
408 12           PL_bufend = s + SvCUR(PL_linestr);
409 12           s = PL_bufptr;
410 12           PL_oldbufptr = s + oldprevlen;
411 12           PL_oldoldbufptr = s + oldoldprevlen;
412 12 50         if (PL_last_uni)
413 0           PL_last_uni = s + oldunilen;
414 12 50         if (PL_last_lop)
415 0           PL_last_lop = s + oldloplen;
416 12 100         if (!incline)
417 3           incline(s);
418              
419             /* debugger active and we're not compiling the debugger code,
420             * so store the line into the debugger's array of lines
421             */
422 12 50         if (PERLDB_LINE && PL_curstash != PL_debstash) {
    0          
423 0 0         AV *fileav = CopFILEAV(PL_curcop);
424 0 0         if (fileav) {
425 0           SV * const sv = NEWSV(85,0);
426 0           sv_upgrade(sv, SVt_PVMG);
427 0           sv_setpvn(sv,PL_bufptr,PL_bufend-PL_bufptr);
428 0           (void)SvIOK_on(sv);
429 0           SvIV_set(sv, 0);
430 0           av_store(fileav,(I32)CopLINE(PL_curcop),sv);
431             }
432             }
433 12           }
434             }
435              
436             STATIC char *
437 239           S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
438             {
439 239           register char *d = dest;
440 239           register char * const e = d + destlen - 3; /* two-character token, ending NUL */
441             for (;;) {
442 457 50         if (d >= e)
443 0           Perl_croak(aTHX_ ident_too_long);
444 457 50         if (UTF && isIDFIRST_utf8_safe((const U8*) s, (const U8*) PL_bufend)) {
    50          
    0          
    50          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
445             /* The UTF-8 case must come first, otherwise things
446             * like c\N{COMBINING TILDE} would start failing, as the
447             * isALNUM case below would gobble the 'c' up.
448             */
449              
450 0           char *t = s + UTF8SKIP(s);
451 0 0         while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
    0          
    0          
    0          
    0          
452 0           t += UTF8SKIP(t);
453             }
454 0 0         if (d + (t - s) > e)
455 0           Perl_croak(aTHX_ "%s", ident_too_long);
456 0           Copy(s, d, t - s, char);
457 0           *d += t - s;
458 0           s = t;
459             }
460 457 100         else if (isALNUM(*s))
461             do {
462 1179           *d++ = *s++;
463 1179 100         } while (isWORDCHAR_A(*s) && d < e);
    50          
464 247 50         else if ( *s == '\''
    0          
    0          
    0          
    0          
    0          
465 0 0         && allow_package
466 0 0         && isIDFIRST_lazy_if_safe(s+1, PL_bufend, UTF))
    0          
    0          
    0          
    0          
    0          
    0          
    0          
467             {
468 0           *d++ = ':';
469 0           *d++ = ':';
470 0           s++;
471             }
472 247 100         else if (*s == ':' && s[1] == ':' && allow_package && s[2] != '$') {
    50          
    50          
    50          
473 8           *d++ = *s++;
474 8           *d++ = *s++;
475             }
476             else {
477 239           *d = '\0';
478 239           *slp = d - dest;
479 239           return s;
480             }
481 218           }
482             }
483              
484             /*
485             * S_incline
486             * This subroutine has nothing to do with tilting, whether at windmills
487             * or pinball tables. Its name is short for "increment line". It
488             * increments the current line number in CopLINE(PL_curcop) and checks
489             * to see whether the line starts with a comment of the form
490             * # line 500 "foo.pm"
491             * If so, it sets the current line number and file to the values in the comment.
492             */
493              
494             STATIC void
495 6           S_incline(pTHX_ char *s)
496             {
497             char *t;
498             char *n;
499             char *e;
500             char ch;
501              
502 6           CopLINE_inc(PL_curcop);
503 6 50         if (*s++ != '#')
504 6           return;
505 0 0         while (SPACE_OR_TAB(*s)) s++;
    0          
506 0 0         if (strnEQ(s, "line", 4))
507 0           s += 4;
508             else
509 0           return;
510 0 0         if (SPACE_OR_TAB(*s))
    0          
511 0           s++;
512             else
513 0           return;
514 0 0         while (SPACE_OR_TAB(*s)) s++;
    0          
515 0 0         if (!isDIGIT(*s))
516 0           return;
517 0           n = s;
518 0 0         while (isDIGIT(*s))
519 0           s++;
520 0 0         while (SPACE_OR_TAB(*s))
    0          
521 0           s++;
522 0 0         if (*s == '"' && (t = strchr(s+1, '"'))) {
    0          
523 0           s++;
524 0           e = t + 1;
525             }
526             else {
527 0 0         for (t = s; !isSPACE(*t); t++) ;
528 0           e = t;
529             }
530 0 0         while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
    0          
    0          
    0          
531 0           e++;
532 0 0         if (*e != '\n' && *e != '\0')
    0          
533 0           return; /* false alarm */
534              
535 0           ch = *t;
536 0           *t = '\0';
537 0 0         if (t - s > 0) {
538             /* this chunk was added to S_incline during 5.8.8. I don't know why but I don't
539             honestly care since I probably want to be bug-compatible anyway (mst) */
540              
541             /* ... my kingdom for a perl parser in perl ... (mst) */
542              
543             #ifdef PERL_5_8_8_PLUS
544             #ifndef USE_ITHREADS
545             const char *cf = CopFILE(PL_curcop);
546             if (cf && strlen(cf) > 7 && strnEQ(cf, "(eval ", 6)) {
547             /* must copy *{"::_<(eval N)[oldfilename:L]"}
548             * to *{"::_
549             char smallbuf[256], smallbuf2[256];
550             char *tmpbuf, *tmpbuf2;
551             GV **gvp, *gv2;
552             STRLEN tmplen = strlen(cf);
553             STRLEN tmplen2 = strlen(s);
554             if (tmplen + 3 < sizeof smallbuf)
555             tmpbuf = smallbuf;
556             else
557             Newx(tmpbuf, tmplen + 3, char);
558             if (tmplen2 + 3 < sizeof smallbuf2)
559             tmpbuf2 = smallbuf2;
560             else
561             Newx(tmpbuf2, tmplen2 + 3, char);
562             tmpbuf[0] = tmpbuf2[0] = '_';
563             tmpbuf[1] = tmpbuf2[1] = '<';
564             memcpy(tmpbuf + 2, cf, ++tmplen);
565             memcpy(tmpbuf2 + 2, s, ++tmplen2);
566             ++tmplen; ++tmplen2;
567             gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
568             if (gvp) {
569             gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
570             if (!isGV(gv2))
571             gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
572             /* adjust ${"::_
573             GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
574             GvHV(gv2) = (HV*)SvREFCNT_inc(GvHV(*gvp));
575             GvAV(gv2) = (AV*)SvREFCNT_inc(GvAV(*gvp));
576             }
577             if (tmpbuf != smallbuf) Safefree(tmpbuf);
578             if (tmpbuf2 != smallbuf2) Safefree(tmpbuf2);
579             }
580             #endif
581             #endif
582             /* second endif closes out the "are we 5.8.(8+)" conditional */
583 0           CopFILE_free(PL_curcop);
584 0           CopFILE_set(PL_curcop, s);
585             }
586 0           *t = ch;
587 0           CopLINE_set(PL_curcop, atoi(n)-1);
588             }
589              
590             /* scan_str
591             takes: start position in buffer
592             keep_quoted preserve \ on the embedded delimiter(s)
593             keep_delims preserve the delimiters around the string
594             returns: position to continue reading from buffer
595             side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
596             updates the read buffer.
597              
598             This subroutine pulls a string out of the input. It is called for:
599             q single quotes q(literal text)
600             ' single quotes 'literal text'
601             qq double quotes qq(interpolate $here please)
602             " double quotes "interpolate $here please"
603             qx backticks qx(/bin/ls -l)
604             ` backticks `/bin/ls -l`
605             qw quote words @EXPORT_OK = qw( func() $spam )
606             m// regexp match m/this/
607             s/// regexp substitute s/this/that/
608             tr/// string transliterate tr/this/that/
609             y/// string transliterate y/this/that/
610             ($*@) sub prototypes sub foo ($)
611             (stuff) sub attr parameters sub foo : attr(stuff)
612             <> readline or globs , <>, <$fh>, or <*.c>
613            
614             In most of these cases (all but <>, patterns and transliterate)
615             yylex() calls scan_str(). m// makes yylex() call scan_pat() which
616             calls scan_str(). s/// makes yylex() call scan_subst() which calls
617             scan_str(). tr/// and y/// make yylex() call scan_trans() which
618             calls scan_str().
619              
620             It skips whitespace before the string starts, and treats the first
621             character as the delimiter. If the delimiter is one of ([{< then
622             the corresponding "close" character )]}> is used as the closing
623             delimiter. It allows quoting of delimiters, and if the string has
624             balanced delimiters ([{<>}]) it allows nesting.
625              
626             On success, the SV with the resulting string is put into lex_stuff or,
627             if that is already non-NULL, into lex_repl. The second case occurs only
628             when parsing the RHS of the special constructs s/// and tr/// (y///).
629             For convenience, the terminating delimiter character is stuffed into
630             SvIVX of the SV.
631             */
632              
633             STATIC char *
634 60           S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
635             {
636             SV *sv; /* scalar value: string */
637             char *tmps; /* temp string, used for delimiter matching */
638 60           register char *s = start; /* current position in the buffer */
639             register char term; /* terminating character */
640             register char *to; /* current position in the sv's data */
641 60           I32 brackets = 1; /* bracket nesting level */
642 60           bool has_utf8 = FALSE; /* is there any utf8 content? */
643             I32 termcode; /* terminating char. code */
644             /* 5.8.7+ uses UTF8_MAXBYTES but also its utf8.h defs _MAXLEN to it so
645             I'm reasonably hopeful this won't destroy anything (mst) */
646             U8 termstr[UTF8_MAXLEN]; /* terminating string */
647             STRLEN termlen; /* length of terminating string */
648 60           char *last = NULL; /* last position for nesting bracket */
649              
650             /* skip space before the delimiter */
651 60 50         if (isSPACE(*s))
652 0           s = skipspace(s);
653              
654             /* mark where we are, in case we need to report errors */
655 60           CLINE;
656              
657             /* after skipping whitespace, the next character is the terminator */
658 60           term = *s;
659 60 50         if (!UTF) {
    50          
    0          
    50          
660 60           termcode = termstr[0] = term;
661 60           termlen = 1;
662             }
663             else {
664 0           termcode = utf8_to_uvchr_buf((U8*)s, PL_bufend, &termlen);
665 0           Copy(s, termstr, termlen, U8);
666 0 0         if (!UTF8_IS_INVARIANT(term))
667 0           has_utf8 = TRUE;
668             }
669              
670             /* mark where we are */
671 60           PL_multi_start = CopLINE(PL_curcop);
672 60           PL_multi_open = term;
673              
674             /* find corresponding closing delimiter */
675 60 50         if (term && (tmps = strchr("([{< )]}> )]}>",term)))
    50          
676 60           termcode = termstr[0] = term = tmps[5];
677              
678 60           PL_multi_close = term;
679              
680             /* create a new SV to hold the contents. 87 is leak category, I'm
681             assuming. 79 is the SV's initial length. What a random number. */
682 60           sv = NEWSV(87,79);
683 60           sv_upgrade(sv, SVt_PVIV);
684 60           SvIV_set(sv, termcode);
685 60           (void)SvPOK_only(sv); /* validate pointer */
686              
687             /* move past delimiter and try to read a complete string */
688 60 50         if (keep_delims)
689 0           sv_catpvn(sv, s, termlen);
690 80           s += termlen;
691             for (;;) {
692             if (PL_encoding && !UTF) {
693             bool cont = TRUE;
694              
695             while (cont) {
696             int offset = s - SvPVX_const(PL_linestr);
697             const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
698             &offset, (char*)termstr, termlen);
699             const char *ns = SvPVX_const(PL_linestr) + offset;
700             char *svlast = SvEND(sv) - 1;
701              
702             for (; s < ns; s++) {
703             if (*s == '\n' && !PL_rsfp)
704             CopLINE_inc(PL_curcop);
705             }
706             if (!found)
707             goto read_more_line;
708             else {
709             /* handle quoted delimiters */
710             if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
711             const char *t;
712             for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
713             t--;
714             if ((svlast-1 - t) % 2) {
715             if (!keep_quoted) {
716             *(svlast-1) = term;
717             *svlast = '\0';
718             SvCUR_set(sv, SvCUR(sv) - 1);
719             }
720             continue;
721             }
722             }
723             if (PL_multi_open == PL_multi_close) {
724             cont = FALSE;
725             }
726             else {
727             const char *t;
728             char *w;
729             if (!last)
730             last = SvPVX(sv);
731             for (t = w = last; t < svlast; w++, t++) {
732             /* At here, all closes are "was quoted" one,
733             so we don't check PL_multi_close. */
734             if (*t == '\\') {
735             if (!keep_quoted && *(t+1) == PL_multi_open)
736             t++;
737             else
738             *w++ = *t++;
739             }
740             else if (*t == PL_multi_open)
741             brackets++;
742              
743             *w = *t;
744             }
745             if (w < t) {
746             *w++ = term;
747             *w = '\0';
748             SvCUR_set(sv, w - SvPVX_const(sv));
749             }
750             last = w;
751             if (--brackets <= 0)
752             cont = FALSE;
753             }
754             }
755             }
756             if (!keep_delims) {
757             SvCUR_set(sv, SvCUR(sv) - 1);
758             *SvEND(sv) = '\0';
759             }
760             break;
761             }
762              
763             /* extend sv if need be */
764 80 50         SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
    50          
765             /* set 'to' to the next character in the sv's string */
766 80           to = SvPVX(sv)+SvCUR(sv);
767              
768             /* if open delimiter is the close delimiter read unbridle */
769 80 50         if (PL_multi_open == PL_multi_close) {
770 0 0         for (; s < PL_bufend; s++,to++) {
771             /* embedded newlines increment the current line number */
772 0 0         if (*s == '\n' && !PL_rsfp)
    0          
773 0           CopLINE_inc(PL_curcop);
774             /* handle quoted delimiters */
775 0 0         if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
    0          
    0          
776 0 0         if (!keep_quoted && s[1] == term)
    0          
777 0           s++;
778             /* any other quotes are simply copied straight through */
779             else
780 0           *to++ = *s++;
781             }
782             /* terminate when run out of buffer (the for() condition), or
783             have found the terminator */
784 0 0         else if (*s == term) {
785 0 0         if (termlen == 1)
786 0           break;
787 0 0         if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
    0          
788 0           break;
789             }
790 0 0         else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
    0          
    0          
    0          
    0          
    0          
791 0           has_utf8 = TRUE;
792 0           *to = *s;
793             }
794             }
795            
796             /* if the terminator isn't the same as the start character (e.g.,
797             matched brackets), we have to allow more in the quoting, and
798             be prepared for nested brackets.
799             */
800             else {
801             /* read until we run out of string, or we find the terminator */
802 396 100         for (; s < PL_bufend; s++,to++) {
803             /* embedded newlines increment the line count */
804 374 100         if (*s == '\n' && !PL_rsfp)
    100          
805 2           CopLINE_inc(PL_curcop);
806             /* backslashes can escape the open or closing characters */
807 374 50         if (*s == '\\' && s+1 < PL_bufend) {
    0          
808 0 0         if (!keep_quoted &&
    0          
809 0 0         ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
810 0           s++;
811             else
812 0           *to++ = *s++;
813             }
814             /* allow nested opens and closes */
815 374 100         else if (*s == PL_multi_close && --brackets <= 0)
    50          
816             break;
817 316 50         else if (*s == PL_multi_open)
818 0           brackets++;
819 316 50         else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
    50          
    0          
    0          
    0          
    0          
820 0           has_utf8 = TRUE;
821 316           *to = *s;
822             }
823             }
824             /* terminate the copied string and update the sv's end-of-string */
825 80           *to = '\0';
826 80           SvCUR_set(sv, to - SvPVX_const(sv));
827              
828             /*
829             * this next chunk reads more into the buffer if we're not done yet
830             */
831              
832 80 100         if (s < PL_bufend)
833 58           break; /* handle case where we are done yet :-) */
834              
835             #ifndef PERL_STRICT_CR
836 22 100         if (to - SvPVX_const(sv) >= 2) {
837 20 50         if ((to[-2] == '\r' && to[-1] == '\n') ||
    0          
    100          
838 2 50         (to[-2] == '\n' && to[-1] == '\r'))
839             {
840 0           to[-2] = '\n';
841 0           to--;
842 0           SvCUR_set(sv, to - SvPVX_const(sv));
843             }
844 20 50         else if (to[-1] == '\r')
845 20           to[-1] = '\n';
846             }
847 2 50         else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
    50          
848 0           to[-1] = '\n';
849             #endif
850            
851             read_more_line:
852             /* if we're out of file, or a read fails, bail and reset the current
853             line marker so we can report where the unterminated string began
854             */
855 42           if (!PL_rsfp ||
856 20           !(PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = filter_gets(PL_linestr, PL_rsfp, 0))) {
857 2           sv_free(sv);
858 2           CopLINE_set(PL_curcop, (line_t)PL_multi_start);
859 2           return Nullch;
860             }
861             /* we read a line, so increment our line counter */
862 20           CopLINE_inc(PL_curcop);
863              
864             /* update debugger info */
865 20 50         if (PERLDB_LINE && PL_curstash != PL_debstash) {
    0          
866 0 0         AV *fileav = CopFILEAV(PL_curcop);
867 0 0         if (fileav) {
868 0           SV *sv = NEWSV(88,0);
869 0           sv_upgrade(sv, SVt_PVMG);
870 0           sv_setsv(sv,PL_linestr);
871 0           (void)SvIOK_on(sv);
872 0           SvIV_set(sv, 0);
873 0           av_store(fileav, (I32)CopLINE(PL_curcop), sv);
874             }
875             }
876              
877             /* having changed the buffer, we must update PL_bufend */
878 20           PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
879 20           PL_last_lop = PL_last_uni = Nullch;
880 78           }
881              
882             /* at this point, we have successfully read the delimited string */
883              
884             if (!PL_encoding || UTF) {
885 58 50         if (keep_delims)
886 0           sv_catpvn(sv, s, termlen);
887 58           s += termlen;
888             }
889 58 50         if (has_utf8 || PL_encoding)
890 0           SvUTF8_on(sv);
891              
892 58           PL_multi_end = CopLINE(PL_curcop);
893              
894             /* if we allocated too much space, give some back */
895 58 50         if (SvCUR(sv) + 5 < SvLEN(sv)) {
896 58           SvLEN_set(sv, SvCUR(sv) + 1);
897             /* 5.8.8 uses SvPV_renew, no prior version actually has the damn thing (mst) */
898             #ifdef PERL_5_8_8_PLUS
899             SvPV_renew(sv, SvLEN(sv));
900             #else
901 58           Renew(SvPVX(sv), SvLEN(sv), char);
902             #endif
903             }
904              
905             /* decide whether this is the first or second quoted string we've read
906             for this op
907             */
908              
909 58 50         if (PL_lex_stuff)
910 0           PL_lex_repl = sv;
911             else
912 58           PL_lex_stuff = sv;
913 60           return s;
914             }
915              
916             #define XFAKEBRACK 128
917              
918             STATIC char *
919 0           S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
920             {
921             register char *d;
922             register char *e;
923 0           char *bracket = Nullch;
924 0           char funny = *s++;
925              
926 0 0         if (isSPACE(*s))
927 0           s = skipspace(s);
928 0           d = dest;
929 0           e = d + destlen - 3; /* two-character token, ending NUL */
930 0 0         if (isDIGIT(*s)) {
931 0 0         while (isDIGIT(*s)) {
932 0 0         if (d >= e)
933 0           Perl_croak(aTHX_ ident_too_long);
934 0           *d++ = *s++;
935             }
936             }
937             else {
938             for (;;) {
939 0 0         if (d >= e)
940 0           Perl_croak(aTHX_ ident_too_long);
941 0 0         if (UTF && isIDFIRST_utf8_safe((const U8*) s, (const U8*) PL_bufend)) {
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
942             /* The UTF-8 case must come first, otherwise things
943             * like c\N{COMBINING TILDE} would start failing, as the
944             * isALNUM case below would gobble the 'c' up.
945             */
946              
947 0           char *t = s + UTF8SKIP(s);
948 0 0         while (isIDCONT_utf8_safe((const U8*) t, (const U8*) PL_bufend)) {
    0          
    0          
    0          
    0          
949 0           t += UTF8SKIP(t);
950             }
951 0 0         if (d + (t - s) > e)
952 0           Perl_croak(aTHX_ "%s", ident_too_long);
953 0           Copy(s, d, t - s, char);
954 0           *d += t - s;
955 0           s = t;
956             }
957 0 0         else if (isALNUM(*s))
958             do {
959 0           *d++ = *s++;
960 0 0         } while (isWORDCHAR_A(*s) && d < e);
    0          
961 0 0         else if (*s == '\'' && isIDFIRST_lazy_if_safe(s+1,send,UTF)) {
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
962 0           *d++ = ':';
963 0           *d++ = ':';
964 0           s++;
965             }
966 0 0         else if (*s == ':' && s[1] == ':') {
    0          
967 0           *d++ = *s++;
968 0           *d++ = *s++;
969             }
970             else
971             break;
972 0           }
973             }
974 0           *d = '\0';
975 0           d = dest;
976 0 0         if (*d) {
977 0 0         if (PL_lex_state != LEX_NORMAL)
978 0           PL_lex_state = LEX_INTERPENDMAYBE;
979 0           return s;
980             }
981 0 0         if (*s == '$' && s[1] &&
    0          
982 0 0         ( isALNUM_lazy_if_safe(s+1,send,UTF)
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
983 0 0         || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
    0          
    0          
984             {
985 0           return s;
986             }
987 0 0         if (*s == '{') {
988 0           bracket = s;
989 0           s++;
990             } else if (ck_uni) {
991             /* we always call this with ck_uni == 0, so no need for check_uni() */
992             /* check_uni(); */
993             }
994 0 0         if (s < send)
995 0           *d = *s++;
996 0           d[1] = '\0';
997 0 0         if (*d == '^' && *s && isCONTROLVAR(*s)) {
    0          
    0          
    0          
998 0 0         *d = toCTRL(*s);
999 0           s++;
1000             }
1001 0 0         if (bracket) {
1002 0 0         if (isSPACE(s[-1])) {
1003 0 0         while (s < send) {
1004 0           const char ch = *s++;
1005 0 0         if (!SPACE_OR_TAB(ch)) {
    0          
1006 0           *d = ch;
1007 0           break;
1008             }
1009             }
1010             }
1011 0 0         if (isIDFIRST_lazy_if_safe(d,d+destlen,UTF)) {
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
1012 0           d++;
1013 0 0         if (UTF) {
    0          
    0          
    0          
1014 0           e = s;
1015 0 0         while ( (( e < send
    0          
    0          
    0          
    0          
1016 0 0         && isIDFIRST_utf8_safe(e, send))
    0          
    0          
1017 0 0         || *e == ':'))
1018             {
1019 0           e += UTF8SKIP(e);
1020 0 0         while (e < send && isIDFIRST_utf8_safe(e, send))
    0          
    0          
    0          
    0          
    0          
    0          
    0          
1021 0           e += UTF8SKIP(e);
1022             }
1023 0           Copy(s, d, e - s, char);
1024 0           d += e - s;
1025 0           s = e;
1026             }
1027             else {
1028 0 0         while ((isALNUM(*s) || *s == ':') && d < e)
    0          
    0          
1029 0           *d++ = *s++;
1030 0 0         if (d >= e)
1031 0           Perl_croak(aTHX_ ident_too_long);
1032             }
1033 0           *d = '\0';
1034 0 0         while (s < send && SPACE_OR_TAB(*s)) s++;
    0          
    0          
1035 0 0         if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
    0          
    0          
1036             /* we don't want perl to guess what is meant. the keyword
1037             * parser decides that later. (rafl)
1038             */
1039             /*
1040             if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest)) {
1041             const char *brack = *s == '[' ? "[...]" : "{...}";
1042             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1043             "Ambiguous use of %c{%s%s} resolved to %c%s%s",
1044             funny, dest, brack, funny, dest, brack);
1045             }
1046             */
1047 0           bracket++;
1048 0           PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
1049 0           return s;
1050             }
1051             }
1052             /* Handle extended ${^Foo} variables
1053             * 1999-02-27 mjd-perl-patch@plover.com */
1054 0 0         else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
    0          
1055 0 0         && isALNUM(*s))
1056             {
1057 0           d++;
1058 0 0         while (isALNUM(*s) && d < e) {
    0          
1059 0           *d++ = *s++;
1060             }
1061 0 0         if (d >= e)
1062 0           Perl_croak(aTHX_ ident_too_long);
1063 0           *d = '\0';
1064             }
1065 0 0         if (*s == '}') {
1066 0           s++;
1067 0 0         if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
    0          
1068 0           PL_lex_state = LEX_INTERPEND;
1069 0           PL_expect = XREF;
1070             }
1071 0 0         if (funny == '#')
1072 0           funny = '@';
1073             /* we don't want perl to guess what is meant. the keyword
1074             * parser decides that later. (rafl)
1075             */
1076             /*
1077             if (PL_lex_state == LEX_NORMAL) {
1078             if (ckWARN(WARN_AMBIGUOUS) &&
1079             (keyword(dest, d - dest) || get_cv(dest, FALSE)))
1080             {
1081             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
1082             "Ambiguous use of %c{%s} resolved to %c%s",
1083             funny, dest, funny, dest);
1084             }
1085             }
1086             */
1087             }
1088             else {
1089 0           s = bracket; /* let the parser handle it */
1090 0           *dest = '\0';
1091             }
1092             }
1093             /* don't intuit. we really just want the string. (rafl) */
1094             /*
1095             else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
1096             PL_lex_state = LEX_INTERPEND;
1097             */
1098 0           return s;
1099             }