File Coverage

regexec.c

Criterion	Covered	Total	%
statement	548	1709	32.0
branch	334	1604	20.8
condition			n/a
subroutine			n/a
pod			n/a
total	882	3313	26.6

line	stmt	bran	code
1			/* Extended regular expression matching and search library.
2			Copyright (C) 2002-2014 Free Software Foundation, Inc.
3			This file is part of the GNU C Library.
4			Contributed by Isamu Hasegawa .
5
6			The GNU C Library is free software; you can redistribute it and/or
7			modify it under the terms of the GNU Lesser General Public
8			License as published by the Free Software Foundation; either
9			version 2.1 of the License, or (at your option) any later version.
10
11			The GNU C Library is distributed in the hope that it will be useful,
12			but WITHOUT ANY WARRANTY; without even the implied warranty of
13			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14			Lesser General Public License for more details.
15
16			You should have received a copy of the GNU Lesser General Public
17			License along with the GNU C Library; if not, see
18			. */
19
20			static reg_errcode_t match_ctx_init (pTHX_ re_match_context_t *cache, int eflags,
21			Idx n) internal_function;
22			static void match_ctx_clean (pTHX_ re_match_context_t *mctx) internal_function;
23			static void match_ctx_free (pTHX_ re_match_context_t *cache) internal_function;
24			static reg_errcode_t match_ctx_add_entry (pTHX_ re_match_context_t *cache, Idx node,
25			Idx str_idx, Idx from, Idx to)
26			internal_function;
27			static Idx search_cur_bkref_entry (pTHX_ const re_match_context_t *mctx, Idx str_idx)
28			internal_function;
29			static reg_errcode_t match_ctx_add_subtop (pTHX_ re_match_context_t *mctx, Idx node,
30			Idx str_idx) internal_function;
31			static re_sub_match_last_t * match_ctx_add_sublast (pTHX_ re_sub_match_top_t *subtop,
32			Idx node, Idx str_idx)
33			internal_function;
34			static void sift_ctx_init (pTHX_ re_sift_context_t sctx, re_dfastate_t *sifted_sts,
35			re_dfastate_t **limited_sts, Idx last_node,
36			Idx last_str_idx)
37			internal_function;
38			static reg_errcode_t re_search_internal (pTHX_ const regex_t *preg,
39			const char string, Idx length, SV sv,
40			Idx start, Idx last_start, Idx stop,
41			size_t nmatch, regmatch_t pmatch[],
42			int eflags) internal_function;
43			static regoff_t re_search_2_stub (pTHX_ struct re_pattern_buffer *bufp,
44			const char string1, Idx length1, SV sv1,
45			const char string2, Idx length2, SV sv2,
46			Idx start, regoff_t range,
47			struct re_registers *regs,
48			Idx stop, bool ret_len) internal_function;
49			static regoff_t re_search_stub (pTHX_ struct re_pattern_buffer *bufp,
50			const char string, Idx length, SV sv, Idx start,
51			regoff_t range, Idx stop,
52			struct re_registers *regs,
53			bool ret_len) internal_function;
54			static unsigned re_copy_regs (pTHX_ struct re_registers regs, regmatch_t pmatch,
55			Idx nregs, int regs_allocated) internal_function;
56			static reg_errcode_t prune_impossible_nodes (pTHX_ re_match_context_t *mctx)
57			internal_function;
58			static Idx check_matching (pTHX_ re_match_context_t *mctx, bool fl_longest_match,
59			Idx *p_match_first) internal_function;
60			static Idx check_halt_state_context (pTHX_ const re_match_context_t *mctx,
61			const re_dfastate_t *state, Idx idx)
62			internal_function;
63			static void update_regs (pTHX_ const re_dfa_t dfa, regmatch_t pmatch,
64			regmatch_t *prev_idx_match, Idx cur_node,
65			Idx cur_idx, Idx nmatch) internal_function;
66			static reg_errcode_t push_fail_stack (pTHX_ struct re_fail_stack_t *fs,
67			Idx str_idx, Idx dest_node, Idx nregs,
68			regmatch_t *regs,
69			re_node_set *eps_via_nodes)
70			internal_function;
71			static reg_errcode_t set_regs (pTHX_ const regex_t *preg,
72			const re_match_context_t *mctx,
73			size_t nmatch, regmatch_t *pmatch,
74			bool fl_backtrack) internal_function;
75			static reg_errcode_t free_fail_stack_return (pTHX_ struct re_fail_stack_t *fs)
76			internal_function;
77
78			#ifdef RE_ENABLE_I18N
79			static int sift_states_iter_mb (pTHX_ const re_match_context_t *mctx,
80			re_sift_context_t *sctx,
81			Idx node_idx, Idx str_idx, Idx max_str_idx)
82			internal_function;
83			#endif /* RE_ENABLE_I18N */
84			static reg_errcode_t sift_states_backward (pTHX_ const re_match_context_t *mctx,
85			re_sift_context_t *sctx)
86			internal_function;
87			static reg_errcode_t build_sifted_states (pTHX_ const re_match_context_t *mctx,
88			re_sift_context_t *sctx, Idx str_idx,
89			re_node_set *cur_dest)
90			internal_function;
91			static reg_errcode_t update_cur_sifted_state (pTHX_ const re_match_context_t *mctx,
92			re_sift_context_t *sctx,
93			Idx str_idx,
94			re_node_set *dest_nodes)
95			internal_function;
96			static reg_errcode_t add_epsilon_src_nodes (pTHX_ const re_dfa_t *dfa,
97			re_node_set *dest_nodes,
98			const re_node_set *candidates)
99			internal_function;
100			static bool check_dst_limits (pTHX_ const re_match_context_t *mctx,
101			const re_node_set *limits,
102			Idx dst_node, Idx dst_idx, Idx src_node,
103			Idx src_idx) internal_function;
104			static int check_dst_limits_calc_pos_1 (pTHX_ const re_match_context_t *mctx,
105			int boundaries, Idx subexp_idx,
106			Idx from_node, Idx bkref_idx)
107			internal_function;
108			static int check_dst_limits_calc_pos (pTHX_ const re_match_context_t *mctx,
109			Idx limit, Idx subexp_idx,
110			Idx node, Idx str_idx,
111			Idx bkref_idx) internal_function;
112			static reg_errcode_t check_subexp_limits (pTHX_ const re_dfa_t *dfa,
113			re_node_set *dest_nodes,
114			const re_node_set *candidates,
115			re_node_set *limits,
116			struct re_backref_cache_entry *bkref_ents,
117			Idx str_idx) internal_function;
118			static reg_errcode_t sift_states_bkref (pTHX_ const re_match_context_t *mctx,
119			re_sift_context_t *sctx,
120			Idx str_idx, const re_node_set *candidates)
121			internal_function;
122			static reg_errcode_t merge_state_array (pTHX_ const re_dfa_t *dfa,
123			re_dfastate_t **dst,
124			re_dfastate_t **src, Idx num)
125			internal_function;
126			static re_dfastate_t find_recover_state (pTHX_ reg_errcode_t err,
127			re_match_context_t *mctx) internal_function;
128			static re_dfastate_t transit_state (pTHX_ reg_errcode_t err,
129			re_match_context_t *mctx,
130			re_dfastate_t *state) internal_function;
131			static re_dfastate_t merge_state_with_log (pTHX_ reg_errcode_t err,
132			re_match_context_t *mctx,
133			re_dfastate_t *next_state)
134			internal_function;
135			static reg_errcode_t check_subexp_matching_top (pTHX_ re_match_context_t *mctx,
136			re_node_set *cur_nodes,
137			Idx str_idx) internal_function;
138			#if 0
139			static re_dfastate_t transit_state_sb (pTHX_ reg_errcode_t err,
140			re_match_context_t *mctx,
141			re_dfastate_t *pstate)
142			internal_function;
143			#endif
144			#ifdef RE_ENABLE_I18N
145			static reg_errcode_t transit_state_mb (pTHX_ re_match_context_t *mctx,
146			re_dfastate_t *pstate)
147			internal_function;
148			#endif /* RE_ENABLE_I18N */
149			static reg_errcode_t transit_state_bkref (pTHX_ re_match_context_t *mctx,
150			const re_node_set *nodes)
151			internal_function;
152			static reg_errcode_t get_subexp (pTHX_ re_match_context_t *mctx,
153			Idx bkref_node, Idx bkref_str_idx)
154			internal_function;
155			static reg_errcode_t get_subexp_sub (pTHX_ re_match_context_t *mctx,
156			const re_sub_match_top_t *sub_top,
157			re_sub_match_last_t *sub_last,
158			Idx bkref_node, Idx bkref_str)
159			internal_function;
160			static Idx find_subexp_node (pTHX_ const re_dfa_t dfa, const re_node_set nodes,
161			Idx subexp_idx, int type) internal_function;
162			static reg_errcode_t check_arrival (pTHX_ re_match_context_t *mctx,
163			state_array_t *path, Idx top_node,
164			Idx top_str, Idx last_node, Idx last_str,
165			int type) internal_function;
166			static reg_errcode_t check_arrival_add_next_nodes (pTHX_ re_match_context_t *mctx,
167			Idx str_idx,
168			re_node_set *cur_nodes,
169			re_node_set *next_nodes)
170			internal_function;
171			static reg_errcode_t check_arrival_expand_ecl (pTHX_ const re_dfa_t *dfa,
172			re_node_set *cur_nodes,
173			Idx ex_subexp, int type)
174			internal_function;
175			static reg_errcode_t check_arrival_expand_ecl_sub (pTHX_ const re_dfa_t *dfa,
176			re_node_set *dst_nodes,
177			Idx target, Idx ex_subexp,
178			int type) internal_function;
179			static reg_errcode_t expand_bkref_cache (pTHX_ re_match_context_t *mctx,
180			re_node_set *cur_nodes, Idx cur_str,
181			Idx subexp_num, int type)
182			internal_function;
183			static bool build_trtable (pTHX_ const re_dfa_t *dfa,
184			re_dfastate_t *state) internal_function;
185			#ifdef RE_ENABLE_I18N
186			static int check_node_accept_bytes (pTHX_ const re_dfa_t *dfa, Idx node_idx,
187			const re_string_t input, SV sv, Idx idx)
188			internal_function;
189			# ifdef _LIBC
190			static unsigned int find_collation_sequence_value (pTHX_ const unsigned char *mbs,
191			size_t name_len)
192			internal_function;
193			# endif /* _LIBC */
194			#endif /* RE_ENABLE_I18N */
195			static Idx group_nodes_into_DFAstates (pTHX_ const re_dfa_t *dfa,
196			const re_dfastate_t *state,
197			re_node_set *states_node,
198			bitset_t *states_ch) internal_function;
199			static bool check_node_accept (pTHX_ const re_match_context_t *mctx,
200			const re_token_t *node, Idx idx)
201			internal_function;
202			static reg_errcode_t extend_buffers (pTHX_ re_match_context_t *mctx, Idx min_len)
203			internal_function;
204
205			/* Entry point for POSIX code. */
206
207			/* regexec searches for a given pattern, specified by PREG, in the
208			string STRING.
209
210			If NMATCH is zero or REG_NOSUB was set in the cflags argument to
211			'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
212			least NMATCH elements, and we set them to the offsets of the
213			corresponding matched substrings.
214
215			EFLAGS specifies "execution flags" which affect matching: if
216			REG_NOTBOL is set, then ^ does not match at the beginning of the
217			string; if REG_NOTEOL is set, then $ does not match at the end.
218
219			We return 0 if we find a match and REG_NOMATCH if not. */
220
221			int
222	0		regexec (pTHX_ const regex_t _Restrict_ preg, const char _Restrict_ string, SV *sv, size_t nmatch, regmatch_t pmatch[_Restrict_arr_], int eflags)
223			{
224			reg_errcode_t err;
225			Idx start, length;
226	0		re_dfa_t *dfa = preg->buffer;
227
228	0	0	if (eflags & ~(REG_NOTBOL \| REG_NOTEOL \| REG_STARTEND))
229	0		return REG_BADPAT;
230
231	0	0	if (eflags & REG_STARTEND)
232			{
233	0		start = pmatch[0].rm_so;
234	0		length = pmatch[0].rm_eo;
235			}
236			else
237			{
238	0		start = 0;
239	0		length = strlen (string);
240			}
241
242			lock_lock (dfa->lock);
243	0	0	if (preg->no_sub)
244	0		err = re_search_internal (aTHX_ preg, string, length, sv, start, length,
245			length, 0, NULL, eflags);
246			else
247	0		err = re_search_internal (aTHX_ preg, string, length, sv, start, length,
248			length, nmatch, pmatch, eflags);
249			lock_unlock (dfa->lock);
250	0		return err != REG_NOERROR;
251			}
252
253			#ifdef _LIBC
254			# include
255			versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
256
257			# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
258			__typeof__ (__regexec) __compat_regexec;
259
260			int
261			attribute_compat_text_section
262			__compat_regexec (pTHX_ const regex_t *_Restrict_ preg,
263			const char *_Restrict_ string, size_t nmatch,
264			regmatch_t pmatch[], int eflags)
265			{
266			return regexec (preg, string, nmatch, pmatch,
267			eflags & (REG_NOTBOL \| REG_NOTEOL));
268			}
269			compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
270			# endif
271			#endif
272
273			/* Entry points for GNU code. */
274
275			/* re_match, re_search, re_match_2, re_search_2
276
277			The former two functions operate on STRING with length LENGTH,
278			while the later two operate on concatenation of STRING1 and STRING2
279			with lengths LENGTH1 and LENGTH2, respectively.
280
281			re_match() matches the compiled pattern in BUFP against the string,
282			starting at index START.
283
284			re_search() first tries matching at index START, then it tries to match
285			starting from index START + 1, and so on. The last start position tried
286			is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
287			way as re_match().)
288
289			The parameter STOP of re_{match,search}_2 specifies that no match exceeding
290			the first STOP characters of the concatenation of the strings should be
291			concerned.
292
293			If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
294			and all groups is stored in REGS. (For the "_2" variants, the offsets are
295			computed relative to the concatenation, not relative to the individual
296			strings.)
297
298			On success, re_match* functions return the length of the match, re_search*
299			return the position of the start of the match. Return value -1 means no
300			match was found and -2 indicates an internal error. */
301
302			regoff_t
303	0		re_match (pTHX_ struct re_pattern_buffer bufp, const char string, Idx length, SV sv, Idx start, struct re_registers regs)
304			{
305	0		return re_search_stub (aTHX_ bufp, string, length, sv, start, 0, length, regs, true);
306			}
307			#ifdef _LIBC
308			weak_alias (__re_match, re_match)
309			#endif
310
311			regoff_t
312	14		re_search (pTHX_ struct re_pattern_buffer bufp, const char string, Idx length, SV sv, Idx start, regoff_t range, struct re_registers regs)
313			{
314	14		return re_search_stub (aTHX_ bufp, string, length, sv, start, range, length, regs,
315			false);
316			}
317			#ifdef _LIBC
318			weak_alias (__re_search, re_search)
319			#endif
320
321			regoff_t
322	0		re_match_2 (pTHX_ struct re_pattern_buffer bufp, const char string1, Idx length1, SV sv1, const char string2, Idx length2, SV sv2, Idx start, struct re_registers regs, Idx stop)
323			{
324	0		return re_search_2_stub (aTHX_ bufp, string1, length1, sv1, string2, length2, sv2,
325			start, 0, regs, stop, true);
326			}
327			#ifdef _LIBC
328			weak_alias (__re_match_2, re_match_2)
329			#endif
330
331			regoff_t
332	0		re_search_2 (pTHX_ struct re_pattern_buffer bufp, const char string1, Idx length1, SV sv1, const char string2, Idx length2, SV sv2, Idx start, regoff_t range, struct re_registers regs, Idx stop)
333			{
334	0		return re_search_2_stub (aTHX_ bufp, string1, length1, sv1, string2, length2, sv2,
335			start, range, regs, stop, false);
336			}
337			#ifdef _LIBC
338			weak_alias (__re_search_2, re_search_2)
339			#endif
340
341			static regoff_t
342	0		re_search_2_stub (pTHX_ struct re_pattern_buffer *bufp,
343			const char string1, Idx length1, SV sv1,
344			const char string2, Idx length2, SV sv2,
345			Idx start, regoff_t range, struct re_registers *regs,
346			Idx stop, bool ret_len)
347			{
348			const char *str;
349			regoff_t rval;
350	0		Idx len = length1 + length2;
351	0		char *s = NULL;
352
353			/* Although this is an extension from us, never used, we say that utf8 behaviour of the two strings
354			must be the same */
355	0	0	if (BE (length1 < 0 \|\| length2 < 0 \|\| stop < 0 \|\| len < length1 \|\| DO_UTF8(sv1) != DO_UTF8(sv2), 0))
		0
		0
		0
		0
		0
356	0		return -2;
357
358			/* Concatenate the strings. */
359	0	0	if (length2 > 0)
360	0	0	if (length1 > 0)
361			{
362	0		re_malloc (s ,char, len);
363			#ifdef _LIBC
364			CANNOT BE REACHED: memcpy (__mempcpy (s, string1, length1), string2, length2);
365			#else
366	0		Copy (string1, s, length1, char);
367	0		Copy (string2, s + length1, length2, char);
368			#endif
369	0		str = s;
370			}
371			else
372	0		str = string2;
373			else
374	0		str = string1;
375
376			{
377	0		SV *sv_tmp = newSVsv(sv1);
378	0		sv_catsv_mg(sv_tmp, sv2);
379	0		rval = re_search_stub (aTHX_ bufp, str, len, sv_tmp, start, range, stop, regs,
380			ret_len);
381	0		SvREFCNT_dec(sv_tmp);
382			}
383	0	0	re_free (s);
384	0		return rval;
385			}
386
387			/* The parameters have the same meaning as those of re_search.
388			Additional parameters:
389			If RET_LEN is true the length of the match is returned (re_match style);
390			otherwise the position of the match is returned. */
391
392			static regoff_t
393	14		re_search_stub (pTHX_ struct re_pattern_buffer *bufp,
394			const char string, Idx length, SV sv,
395			Idx start, regoff_t range, Idx stop, struct re_registers *regs,
396			bool ret_len)
397			{
398			reg_errcode_t result;
399			regmatch_t *pmatch;
400			Idx nregs;
401			regoff_t rval;
402	14		int eflags = 0;
403	14		re_dfa_t *dfa = bufp->buffer;
404	14		Idx last_start = start + range;
405
406			/* Check for out-of-range. */
407	14	50	if (BE (start < 0 \|\| start > length, 0))
408	0		return -1;
409	14	50	if (BE (length < last_start \|\| (0 <= range && last_start < start), 0))
		50
		50
		50
410	0		last_start = length;
411	14	50	else if (BE (last_start < 0 \|\| (range < 0 && start <= last_start), 0))
		0
412	0		last_start = 0;
413
414			lock_lock (dfa->lock);
415
416	14		eflags \|= (bufp->not_bol) ? REG_NOTBOL : 0;
417	14	50	eflags \|= (bufp->not_eol) ? REG_NOTEOL : 0;
418
419			/* Compile fastmap if we haven't yet. */
420	14	50	if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
		50
		0
421	0		re_compile_fastmap (aTHX_ bufp);
422
423	14	50	if (BE (bufp->no_sub, 0))
424	0		regs = NULL;
425
426			/* We need at least 1 register. */
427	14	50	if (regs == NULL)
428	0		nregs = 1;
429	14	50	else if (BE (bufp->regs_allocated == REGS_FIXED
		0
430			&& regs->num_regs <= bufp->re_nsub, 0))
431			{
432	0		nregs = regs->num_regs;
433	0	0	if (BE (nregs < 1, 0))
434			{
435			/* Nothing can be copied to regs. */
436	0		regs = NULL;
437	0		nregs = 1;
438			}
439			}
440			else
441	14		nregs = bufp->re_nsub + 1;
442
443	14	50	re_malloc (pmatch, regmatch_t, nregs);
444	14		result = re_search_internal (aTHX_ bufp, string, length, sv, start, last_start, stop,
445			nregs, pmatch, eflags);
446
447	14		rval = 0;
448
449			/* I hope we needn't fill their regs with -1's when no match was found. */
450	14	100	if (result != REG_NOERROR)
451	2	50	rval = result == REG_NOMATCH ? -1 : -2;
452	12	50	else if (regs != NULL)
453			{
454			/* If caller wants register contents data back, copy them. */
455	12		bufp->regs_allocated = re_copy_regs (aTHX_ regs, pmatch, nregs,
456	12		bufp->regs_allocated);
457	12	50	if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
458	0		rval = -2;
459			}
460
461	14	100	if (BE (rval == 0, 1))
462			{
463	12	50	if (ret_len)
464			{
465			assert (pmatch[0].rm_so == start);
466	0		rval = pmatch[0].rm_eo - start;
467			}
468			else
469	12		rval = pmatch[0].rm_so;
470			}
471	14	50	re_free (pmatch);
472			out:
473			lock_unlock (dfa->lock);
474	14		return rval;
475			}
476
477			static unsigned
478	12		re_copy_regs (pTHX_ struct re_registers regs, regmatch_t pmatch, Idx nregs,
479			int regs_allocated)
480			{
481	12		int rval = REGS_REALLOCATE;
482			Idx i;
483	12		Idx need_regs = nregs + 1;
484			/* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
485			uses. */
486
487			/* Have the register data arrays been allocated? */
488	12	100	if (regs_allocated == REGS_UNALLOCATED)
489			{ /* No. So allocate them with malloc. */
490	5	50	re_malloc (regs->start, regoff_t, need_regs);
491	5	50	re_malloc (regs->end, regoff_t, need_regs);
492	5		regs->num_regs = need_regs;
493			}
494	7	50	else if (regs_allocated == REGS_REALLOCATE)
495			{ /* Yes. If we need more elements than were already
496			allocated, reallocate them. If we need fewer, just
497			leave it alone. */
498	7	50	if (BE (need_regs > regs->num_regs, 0))
499			{
500	7	50	re_realloc (regs->start, regoff_t, need_regs);
501	7	50	re_realloc (regs->end, regoff_t, need_regs);
502	7		regs->num_regs = need_regs;
503			}
504			}
505			else
506			{
507			assert (regs_allocated == REGS_FIXED);
508			/* This function may not be called with REGS_FIXED and nregs too big. */
509			assert (regs->num_regs >= nregs);
510	0		rval = REGS_FIXED;
511			}
512
513			/* Copy the regs. */
514	37	100	for (i = 0; i < nregs; ++i)
515			{
516	25		regs->start[i] = pmatch[i].rm_so;
517	25		regs->end[i] = pmatch[i].rm_eo;
518			}
519	24	100	for ( ; i < regs->num_regs; ++i)
520	12		regs->start[i] = regs->end[i] = -1;
521
522	12		return rval;
523			}
524
525			/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
526			ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
527			this memory for recording register information. STARTS and ENDS
528			must be allocated using the malloc library routine, and must each
529			be at least NUM_REGS * sizeof (regoff_t) bytes long.
530
531			If NUM_REGS == 0, then subsequent matches should allocate their own
532			register data.
533
534			Unless this function is called, the first search or match using
535			PATTERN_BUFFER will allocate its own register data, without
536			freeing the old data. */
537
538			void
539	0		re_set_registers (pTHX_ struct re_pattern_buffer bufp, struct re_registers regs, __re_size_t num_regs, regoff_t starts, regoff_t ends)
540			{
541	0	0	if (num_regs)
542			{
543	0		bufp->regs_allocated = REGS_REALLOCATE;
544	0		regs->num_regs = num_regs;
545	0		regs->start = starts;
546	0		regs->end = ends;
547			}
548			else
549			{
550	0		bufp->regs_allocated = REGS_UNALLOCATED;
551	0		regs->num_regs = 0;
552	0		regs->start = regs->end = NULL;
553			}
554	0		}
555			#ifdef _LIBC
556			weak_alias (__re_set_registers, re_set_registers)
557			#endif
558
559			/* Entry points compatible with 4.2 BSD regex library. We don't define
560			them unless specifically requested. */
561
562			#if defined _REGEX_RE_COMP \|\| defined _LIBC
563			int
564			# ifdef _LIBC
565			weak_function
566			# endif
567			re_exec (s)
568			const char *s;
569			{
570			return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
571			}
572			#endif /* _REGEX_RE_COMP */
573
574			/* Internal entry point. */
575
576			/* Searches for a compiled pattern PREG in the string STRING, whose
577			length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
578			meaning as with regexec. LAST_START is START + RANGE, where
579			START and RANGE have the same meaning as with re_search.
580			Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
581			otherwise return the error code.
582			Note: We assume front end functions already check ranges.
583			(0 <= LAST_START && LAST_START <= LENGTH) */
584
585			static reg_errcode_t
586			__attribute_warn_unused_result__
587	14		re_search_internal (pTHX_ const regex_t *preg,
588			const char string, Idx length, SV sv,
589			Idx start, Idx last_start, Idx stop,
590			size_t nmatch, regmatch_t pmatch[],
591			int eflags)
592			{
593			reg_errcode_t err;
594	14		const re_dfa_t *dfa = preg->buffer;
595			Idx left_lim, right_lim;
596			int incr;
597			bool fl_longest_match;
598			int match_kind;
599			Idx match_first;
600	14		Idx match_last = REG_MISSING;
601			Idx extra_nmatch;
602			bool sb;
603			int ch;
604			#if defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
605	14		re_match_context_t mctx = { .dfa = dfa };
606			#else
607			re_match_context_t mctx;
608			#endif
609	0	0	char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
610	0	0	&& start != last_start && !preg->can_be_null)
		0
611	14	50	? preg->fastmap : NULL);
612	14		RE_TRANSLATE_TYPE t = preg->translate;
613
614			#if !(defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
615			memset (&mctx, '\0', sizeof (re_match_context_t));
616			mctx.dfa = dfa;
617			#endif
618			#ifdef _PERL_I18N
619	14		mctx.sv = sv;
620			#endif
621
622	14	50	extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
623	14		nmatch -= extra_nmatch;
624
625			/* Check if the DFA haven't been compiled. */
626	14	50	if (BE (preg->used == 0 \|\| dfa->init_state == NULL
		50
		50
		50
		50
		50
		50
		50
627			\|\| dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
628			\|\| dfa->init_state_begbuf == NULL, 0))
629	0		return REG_NOMATCH;
630
631			#ifdef DEBUG
632			/* We assume front-end functions already check them. */
633			assert (0 <= last_start && last_start <= length);
634			#endif
635
636			/* If initial states with non-begbuf contexts have no elements,
637			the regex must be anchored. If preg->newline_anchor is set,
638			we'll never use init_state_nl, so do not check it. */
639	14	50	if (dfa->init_state->nodes.nelem == 0
640	0	0	&& dfa->init_state_word->nodes.nelem == 0
641	0	0	&& (dfa->init_state_nl->nodes.nelem == 0
642	0	0	\|\| !preg->newline_anchor))
643			{
644	0	0	if (start != 0 && last_start != 0)
		0
645	0		return REG_NOMATCH;
646	0		start = last_start = 0;
647			}
648
649			/* We must check the longest matching, if nmatch > 0. */
650	14	50	fl_longest_match = (nmatch != 0 \|\| dfa->nbackref);
		0
651
652	14		err = re_string_allocate (aTHX_ &mctx.input, string, length, /* c.f. comment in mbrtowc / / dfa->nodes_len + 1 */ length + 1,
653	14		preg->translate, (preg->syntax & RE_ICASE) != 0,
654			dfa);
655	14	50	if (BE (err != REG_NOERROR, 0))
656	0		goto free_return;
657	14		mctx.input.stop = stop;
658	14		mctx.input.raw_stop = stop;
659	14		mctx.input.newline_anchor = preg->newline_anchor;
660
661	14		err = match_ctx_init (aTHX_ &mctx, eflags, dfa->nbackref * 2);
662	14	50	if (BE (err != REG_NOERROR, 0))
663	0		goto free_return;
664
665			/* We will log all the DFA states through which the dfa pass,
666			if nmatch > 1, or this dfa has "multibyte node", which is a
667			back-reference or a node which can accept multibyte character or
668			multi character collating element. */
669	14	100	if (nmatch > 1 \|\| dfa->has_mb_node)
		50
670			{
671			/* Avoid overflow. */
672	13	50	if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
673			<= mctx.input.bufs_len), 0))
674			{
675	0		err = REG_ESPACE;
676	0		goto free_return;
677			}
678
679	13	50	re_malloc (mctx.state_log, re_dfastate_t *, mctx.input.bufs_len + 1);
680			}
681			else
682	1		mctx.state_log = NULL;
683
684	14		match_first = start;
685	14	50	mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
686			: CONTEXT_NEWLINE \| CONTEXT_BEGBUF;
687
688			/* Check incrementally whether the input string matches. */
689	14	50	incr = (last_start < start) ? -1 : 1;
690	14		left_lim = (last_start < start) ? last_start : start;
691	14		right_lim = (last_start < start) ? start : last_start;
692	14		sb = dfa->mb_cur_max == 1;
693	14		match_kind =
694			(fastmap
695	0	0	? ((sb \|\| !(preg->syntax & RE_ICASE \|\| t) ? 4 : 0)
		0
		0
696	0	0	\| (start <= last_start ? 2 : 0)
697	0		\| (t != NULL ? 1 : 0))
698	14	50	: 8);
699
700	39		for (;; match_first += incr)
701			{
702	53		err = REG_NOMATCH;
703	53	50	if (match_first < left_lim \|\| right_lim < match_first)
		100
704			goto free_return;
705
706			/* Advance as rapidly as possible through the string, until we
707			find a plausible place to start matching. This may be done
708			with varying efficiency, so there are various possibilities:
709			only the most common of them are specialized, in order to
710			save on code size. We use a switch statement for speed. */
711	51		switch (match_kind)
712			{
713			case 8:
714			/* No fastmap. */
715	51		break;
716
717			case 7:
718			/* Fastmap with single-byte translation, match forward. */
719	0	0	while (BE (match_first < right_lim, 1)
720	0	0	&& !fastmap[t[(unsigned char) string[match_first]]])
721	0		++match_first;
722	0		goto forward_match_found_start_or_reached_end;
723
724			case 6:
725			/* Fastmap without translation, match forward. */
726	0	0	while (BE (match_first < right_lim, 1)
727	0	0	&& !fastmap[(unsigned char) string[match_first]])
728	0		++match_first;
729
730			forward_match_found_start_or_reached_end:
731	0	0	if (BE (match_first == right_lim, 0))
732			{
733	0		ch = match_first >= length
734	0	0	? 0 : (unsigned char) string[match_first];
735	0	0	if (!fastmap[t ? t[ch] : ch])
		0
736	0		goto free_return;
737			}
738	0		break;
739
740			case 4:
741			case 5:
742			/* Fastmap without multi-byte translation, match backwards. */
743	0	0	while (match_first >= left_lim)
744			{
745	0		ch = match_first >= length
746	0	0	? 0 : (unsigned char) string[match_first];
747	0	0	if (fastmap[t ? t[ch] : ch])
		0
748	0		break;
749	0		--match_first;
750			}
751	0	0	if (match_first < left_lim)
752	0		goto free_return;
753	0		break;
754
755			default:
756			/* In this case, we can't determine easily the current byte,
757			since it might be a component byte of a multibyte
758			character. Then we use the constructed buffer instead. */
759			for (;;)
760			{
761			/* If MATCH_FIRST is out of the valid range, reconstruct the
762			buffers. */
763	0		__re_size_t offset = match_first - mctx.input.raw_mbs_idx;
764	0	0	if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
765			{
766	0		err = re_string_reconstruct (aTHX_ &mctx.input, match_first,
767			eflags);
768	0	0	if (BE (err != REG_NOERROR, 0))
769	0		goto free_return;
770
771	0		offset = match_first - mctx.input.raw_mbs_idx;
772			}
773			/* If MATCH_FIRST is out of the buffer, leave it as '\0'.
774			Note that MATCH_FIRST must not be smaller than 0. */
775	0		ch = (match_first >= length
776	0	0	? 0 : re_string_byte_at (&mctx.input, offset));
777	0	0	if (fastmap[ch])
778	0		break;
779	0		match_first += incr;
780	0	0	if (match_first < left_lim \|\| match_first > right_lim)
		0
781			{
782	0		err = REG_NOMATCH;
783	0		goto free_return;
784			}
785	0		}
786	0		break;
787			}
788
789			/* Reconstruct the buffers so that the matcher can assume that
790			the matching starts from the beginning of the buffer. */
791	51		err = re_string_reconstruct (aTHX_ &mctx.input, match_first, eflags);
792	51	50	if (BE (err != REG_NOERROR, 0))
793	0		goto free_return;
794
795			#ifdef RE_ENABLE_I18N
796			/* Don't consider this char as a possible match start if it part,
797			yet isn't the head, of a multibyte character. */
798	51	50	if (!sb && !re_string_first_byte (&mctx.input, 0))
		100
		100
799	16		continue;
800			#endif
801
802			/* It seems to be appropriate one, then use the matcher. */
803			/* We assume that the matching starts from 0. */
804	35		mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
805	35	50	match_last = check_matching (aTHX_ &mctx, fl_longest_match,
806			start <= last_start ? &match_first : NULL);
807	35	100	if (match_last != REG_MISSING)
808			{
809	12	50	if (BE (match_last == REG_ERROR, 0))
810			{
811	0		err = REG_ESPACE;
812	0		goto free_return;
813			}
814			else
815			{
816	12		mctx.match_last = match_last;
817	12	50	if ((!preg->no_sub && nmatch > 1) \|\| dfa->nbackref)
		50
		0
818			{
819	12		re_dfastate_t *pstate = mctx.state_log[match_last];
820	12		mctx.last_node = check_halt_state_context (aTHX_ &mctx, pstate,
821			match_last);
822			}
823	12	50	if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
		50
		100
824	3	50	\|\| dfa->nbackref)
825			{
826	9		err = prune_impossible_nodes (aTHX_ &mctx);
827	9	50	if (err == REG_NOERROR)
828	9		break;
829	0	0	if (BE (err != REG_NOMATCH, 0))
830	0		goto free_return;
831	0		match_last = REG_MISSING;
832			}
833			else
834			break; /* We found a match. */
835			}
836			}
837
838	23		match_ctx_clean (aTHX_ &mctx);
839	39		}
840
841			#ifdef DEBUG
842			assert (match_last != REG_MISSING);
843			assert (err == REG_NOERROR);
844			#endif
845
846			/* Set pmatch[] if we need. */
847	12	50	if (nmatch > 0)
848			{
849			Idx reg_idx;
850
851			/* Initialize registers. */
852	25	100	for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
853	13		pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
854
855			/* Set the points where matching start/end. */
856	12		pmatch[0].rm_so = 0;
857	12		pmatch[0].rm_eo = mctx.match_last;
858			/* FIXME: This function should fail if mctx.match_last exceeds
859			the maximum possible regoff_t value. We need a new error
860			code REG_OVERFLOW. */
861
862	12	50	if (!preg->no_sub && nmatch > 1)
		50
863			{
864	12		err = set_regs (aTHX_ preg, &mctx, nmatch, pmatch,
865	12	100	dfa->has_plural_match && dfa->nbackref > 0);
		50
866	12	50	if (BE (err != REG_NOERROR, 0))
867	0		goto free_return;
868			}
869
870			/* At last, add the offset to each register, since we slid
871			the buffers so that we could assume that the matching starts
872			from 0. */
873	37	100	for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
874	25	50	if (pmatch[reg_idx].rm_so != -1)
875			{
876			#ifdef RE_ENABLE_I18N
877	25	50	if (BE (mctx.input.offsets_needed != 0, 0))
878			{
879	0	0	pmatch[reg_idx].rm_so =
880	0		(pmatch[reg_idx].rm_so == mctx.input.valid_len
881	0		? mctx.input.valid_raw_len
882	0		: mctx.input.offsets[pmatch[reg_idx].rm_so]);
883	0	0	pmatch[reg_idx].rm_eo =
884	0		(pmatch[reg_idx].rm_eo == mctx.input.valid_len
885	0		? mctx.input.valid_raw_len
886	0		: mctx.input.offsets[pmatch[reg_idx].rm_eo]);
887			}
888			#else
889			assert (mctx.input.offsets_needed == 0);
890			#endif
891	25		pmatch[reg_idx].rm_so += match_first;
892	25		pmatch[reg_idx].rm_eo += match_first;
893			}
894	12	50	for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
895			{
896	0		pmatch[nmatch + reg_idx].rm_so = -1;
897	0		pmatch[nmatch + reg_idx].rm_eo = -1;
898			}
899
900	12	50	if (dfa->subexp_map)
901	0	0	for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
902	0	0	if (dfa->subexp_map[reg_idx] != reg_idx)
903			{
904	0		pmatch[reg_idx + 1].rm_so
905	0		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
906	0		pmatch[reg_idx + 1].rm_eo
907	0		= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
908			}
909			}
910
911			free_return:
912	14	100	re_free (mctx.state_log);
913	14	50	if (dfa->nbackref)
914	0		match_ctx_free (aTHX_ &mctx);
915	14		re_string_destruct (aTHX_ &mctx.input);
916	14		return err;
917			}
918
919			static reg_errcode_t
920			__attribute_warn_unused_result__
921	9		prune_impossible_nodes (pTHX_ re_match_context_t *mctx)
922			{
923	9		const re_dfa_t *const dfa = mctx->dfa;
924			Idx halt_node, match_last;
925			reg_errcode_t ret;
926			re_dfastate_t **sifted_states;
927	9		re_dfastate_t **lim_states = NULL;
928			re_sift_context_t sctx;
929			#ifdef DEBUG
930			assert (mctx->state_log != NULL);
931			#endif
932	9		match_last = mctx->match_last;
933	9		halt_node = mctx->last_node;
934
935			/* Avoid overflow. */
936	9	50	if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0))
937	0		return REG_ESPACE;
938
939	9	50	re_malloc (sifted_states, re_dfastate_t *, match_last + 1);
940	9	50	if (dfa->nbackref)
941			{
942	0	0	re_malloc (lim_states, re_dfastate_t *, match_last + 1);
943			while (1)
944			{
945	0		memset (lim_states, '\0',
946			sizeof (re_dfastate_t ) (match_last + 1));
947	0		sift_ctx_init (aTHX_ &sctx, sifted_states, lim_states, halt_node,
948			match_last);
949	0		ret = sift_states_backward (aTHX_ mctx, &sctx);
950	0	0	re_node_set_free (&sctx.limits);
951	0	0	if (BE (ret != REG_NOERROR, 0))
952	0		goto free_return;
953	0	0	if (sifted_states[0] != NULL \|\| lim_states[0] != NULL)
		0
954			break;
955			do
956			{
957	0		--match_last;
958	0	0	if (! REG_VALID_INDEX (match_last))
959			{
960	0		ret = REG_NOMATCH;
961	0		goto free_return;
962			}
963	0		} while (mctx->state_log[match_last] == NULL
964	0	0	\|\| !mctx->state_log[match_last]->halt);
		0
965	0		halt_node = check_halt_state_context (aTHX_ mctx,
966	0		mctx->state_log[match_last],
967			match_last);
968	0		}
969	0		ret = merge_state_array (aTHX_ dfa, sifted_states, lim_states,
970			match_last + 1);
971	0	0	re_free (lim_states);
972	0		lim_states = NULL;
973	0	0	if (BE (ret != REG_NOERROR, 0))
974	0		goto free_return;
975			}
976			else
977			{
978	9		sift_ctx_init (aTHX_ &sctx, sifted_states, lim_states, halt_node, match_last);
979	9		ret = sift_states_backward (aTHX_ mctx, &sctx);
980	9	50	re_node_set_free (&sctx.limits);
981	9	50	if (BE (ret != REG_NOERROR, 0))
982	0		goto free_return;
983	9	50	if (sifted_states[0] == NULL)
984			{
985	0		ret = REG_NOMATCH;
986	0		goto free_return;
987			}
988			}
989	9	50	re_free (mctx->state_log);
990	9		mctx->state_log = sifted_states;
991	9		sifted_states = NULL;
992	9		mctx->last_node = halt_node;
993	9		mctx->match_last = match_last;
994	9		ret = REG_NOERROR;
995			free_return:
996	9	50	re_free (sifted_states);
997	9	50	re_free (lim_states);
998	9		return ret;
999			}
1000
1001			/* Acquire an initial state and return it.
1002			We must select appropriate initial state depending on the context,
1003			since initial states may have constraints like "\<", "^", etc.. */
1004
1005			static inline re_dfastate_t *
1006			__attribute__ ((always_inline)) internal_function
1007			acquire_init_state_context (pTHX_ reg_errcode_t err, const re_match_context_t mctx,
1008			Idx idx)
1009			{
1010	35		const re_dfa_t *const dfa = mctx->dfa;
1011	35	50	if (dfa->init_state->has_constraint)
1012			{
1013			unsigned int context;
1014	0		context = re_string_context_at (aTHX_ &mctx->input, idx - 1, mctx->eflags);
1015	0	0	if (IS_WORD_CONTEXT (context))
1016	0		return dfa->init_state_word;
1017	0	0	else if (IS_ORDINARY_CONTEXT (context))
1018	0		return dfa->init_state;
1019	0	0	else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
		0
1020	0		return dfa->init_state_begbuf;
1021	0	0	else if (IS_NEWLINE_CONTEXT (context))
1022	0		return dfa->init_state_nl;
1023	0	0	else if (IS_BEGBUF_CONTEXT (context))
1024			{
1025			/* It is relatively rare case, then calculate on demand. */
1026	0		return re_acquire_state_context (aTHX_ err, dfa,
1027	0		dfa->init_state->entrance_nodes,
1028			context);
1029			}
1030			else
1031			/* Must not happen? */
1032	0		return dfa->init_state;
1033			}
1034			else
1035	35		return dfa->init_state;
1036			}
1037
1038			/* Check whether the regular expression match input string INPUT or not,
1039			and return the index where the matching end. Return REG_MISSING if
1040			there is no match, and return REG_ERROR in case of an error.
1041			FL_LONGEST_MATCH means we want the POSIX longest matching.
1042			If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1043			next place where we may want to try matching.
1044			Note that the matcher assumes that the matching starts from the current
1045			index of the buffer. */
1046
1047			static Idx
1048			internal_function __attribute_warn_unused_result__
1049	35		check_matching (pTHX_ re_match_context_t *mctx, bool fl_longest_match,
1050			Idx *p_match_first)
1051			{
1052	35		const re_dfa_t *const dfa = mctx->dfa;
1053			reg_errcode_t err;
1054	35		Idx match = 0;
1055	35		Idx match_last = REG_MISSING;
1056	35		Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1057			re_dfastate_t *cur_state;
1058	35		bool at_init_state = p_match_first != NULL;
1059	35		Idx next_start_idx = cur_str_idx;
1060
1061	35		err = REG_NOERROR;
1062	35		cur_state = acquire_init_state_context (aTHX_ &err, mctx, cur_str_idx);
1063			/* An initial state must not be NULL (invalid). */
1064	35	50	if (BE (cur_state == NULL, 0))
1065			{
1066			assert (err == REG_ESPACE);
1067	0		return REG_ERROR;
1068			}
1069
1070	35	100	if (mctx->state_log != NULL)
1071			{
1072	21		mctx->state_log[cur_str_idx] = cur_state;
1073
1074			/* Check OP_OPEN_SUBEXP in the initial state in case that we use them
1075			later. E.g. Processing back references. */
1076	21	50	if (BE (dfa->nbackref, 0))
1077			{
1078	0		at_init_state = false;
1079	0		err = check_subexp_matching_top (aTHX_ mctx, &cur_state->nodes, 0);
1080	0	0	if (BE (err != REG_NOERROR, 0))
1081	0		return err;
1082
1083	0	0	if (cur_state->has_backref)
1084			{
1085	0		err = transit_state_bkref (aTHX_ mctx, &cur_state->nodes);
1086	0	0	if (BE (err != REG_NOERROR, 0))
1087	0		return err;
1088			}
1089			}
1090			}
1091
1092			/* If the RE accepts NULL string. */
1093	35	50	if (BE (cur_state->halt, 0))
1094			{
1095	0	0	if (!cur_state->has_constraint
1096	0	0	\|\| check_halt_state_context (aTHX_ mctx, cur_state, cur_str_idx))
1097			{
1098	0	0	if (!fl_longest_match)
1099	0		return cur_str_idx;
1100			else
1101			{
1102	0		match_last = cur_str_idx;
1103	0		match = 1;
1104			}
1105			}
1106			}
1107
1108	66	100	while (!re_string_eoi (&mctx->input))
1109			{
1110	61		re_dfastate_t *old_state = cur_state;
1111	61		Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1112
1113	61	50	if ((BE (next_char_idx >= mctx->input.bufs_len, 0)
1114	0	0	&& mctx->input.bufs_len < mctx->input.len)
1115	61	100	\|\| (BE (next_char_idx >= mctx->input.valid_len, 0)
1116	7	50	&& mctx->input.valid_len < mctx->input.len))
1117			{
1118	0		err = extend_buffers (aTHX_ mctx, next_char_idx + 1);
1119	0	0	if (BE (err != REG_NOERROR, 0))
1120			{
1121			assert (err == REG_ESPACE);
1122	0		return REG_ERROR;
1123			}
1124			}
1125
1126	61		cur_state = transit_state (aTHX_ &err, mctx, cur_state);
1127	61	100	if (mctx->state_log != NULL)
1128	46		cur_state = merge_state_with_log (aTHX_ &err, mctx, cur_state);
1129
1130	61	100	if (cur_state == NULL)
1131			{
1132			/* Reached the invalid state or an error. Try to recover a valid
1133			state using the state log, if available and if we have not
1134			already found a valid (even if not the longest) match. */
1135	46	50	if (BE (err != REG_NOERROR, 0))
1136	0		return REG_ERROR;
1137
1138	46	100	if (mctx->state_log == NULL
1139	33	100	\|\| (match && !fl_longest_match)
		50
1140	33	100	\|\| (cur_state = find_recover_state (aTHX_ &err, mctx)) == NULL)
1141			break;
1142			}
1143
1144	31	100	if (BE (at_init_state, 0))
1145			{
1146	21	50	if (old_state == cur_state)
1147	0		next_start_idx = next_char_idx;
1148			else
1149	21		at_init_state = false;
1150			}
1151
1152	31	100	if (cur_state->halt)
1153			{
1154			/* Reached a halt state.
1155			Check the halt state can satisfy the current context. */
1156	12	50	if (!cur_state->has_constraint
1157	0	0	\|\| check_halt_state_context (aTHX_ mctx, cur_state,
1158			re_string_cur_idx (&mctx->input)))
1159			{
1160			/* We found an appropriate halt state. */
1161	12		match_last = re_string_cur_idx (aTHX_ &mctx->input);
1162	12		match = 1;
1163
1164			/* We found a match, do not modify match_first below. */
1165	12		p_match_first = NULL;
1166	12	50	if (!fl_longest_match)
1167	0		break;
1168			}
1169			}
1170			}
1171
1172	35	100	if (p_match_first)
1173	23		*p_match_first += next_start_idx;
1174
1175	35		return match_last;
1176			}
1177
1178			/* Check NODE match the current context. */
1179
1180			static bool
1181			internal_function
1182	21		check_halt_node_context (pTHX_ const re_dfa_t *dfa, Idx node, unsigned int context)
1183			{
1184	21		re_token_type_t type = dfa->nodes[node].type;
1185	21		unsigned int constraint = dfa->nodes[node].constraint;
1186	21	100	if (type != END_OF_RE)
1187	9		return false;
1188	12	50	if (!constraint)
1189	12		return true;
1190	0	0	if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
		0
		0
		0
		0
		0
		0
		0
1191	0		return false;
1192	0		return true;
1193			}
1194
1195			/* Check the halt state STATE match the current context.
1196			Return 0 if not match, if the node, STATE has, is a halt node and
1197			match the context, return the node. */
1198
1199			static Idx
1200			internal_function
1201	12		check_halt_state_context (pTHX_ const re_match_context_t *mctx,
1202			const re_dfastate_t *state, Idx idx)
1203			{
1204			Idx i;
1205			unsigned int context;
1206			#ifdef DEBUG
1207			assert (state->halt);
1208			#endif
1209	12		context = re_string_context_at (aTHX_ &mctx->input, idx, mctx->eflags);
1210	21	50	for (i = 0; i < state->nodes.nelem; ++i)
1211	21	100	if (check_halt_node_context (aTHX_ mctx->dfa, state->nodes.elems[i], context))
1212	12		return state->nodes.elems[i];
1213	0		return 0;
1214			}
1215
1216			/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
1217			corresponding to the DFA).
1218			Return the destination node, and update EPS_VIA_NODES;
1219			return REG_MISSING in case of errors. */
1220
1221			static Idx
1222			internal_function
1223	57		proceed_next_node (pTHX_ const re_match_context_t mctx, Idx nregs, regmatch_t regs,
1224			Idx pidx, Idx node, re_node_set eps_via_nodes,
1225			struct re_fail_stack_t *fs)
1226			{
1227	57		const re_dfa_t *const dfa = mctx->dfa;
1228			Idx i;
1229			bool ok;
1230	57	100	if (IS_EPSILON_NODE (dfa->nodes[node].type))
1231			{
1232	35		re_node_set cur_nodes = &mctx->state_log[pidx]->nodes;
1233	35		re_node_set *edests = &dfa->edests[node];
1234			Idx dest_node;
1235	35		ok = re_node_set_insert (aTHX_ eps_via_nodes, node);
1236	35	50	if (BE (! ok, 0))
1237	0		return REG_ERROR;
1238			/* Pick up a valid destination, or return REG_MISSING if none
1239			is found. */
1240	79	100	for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
1241			{
1242	44		Idx candidate = edests->elems[i];
1243	44	100	if (!re_node_set_contains (aTHX_ cur_nodes, candidate))
1244	9		continue;
1245	35	50	if (dest_node == REG_MISSING)
1246	35		dest_node = candidate;
1247
1248			else
1249			{
1250			/* In order to avoid infinite loop like "(a)", return the second
1251			epsilon-transition if the first was already considered. */
1252	0	0	if (re_node_set_contains (aTHX_ eps_via_nodes, dest_node))
1253	0		return candidate;
1254
1255			/* Otherwise, push the second epsilon-transition on the fail stack. */
1256	0	0	else if (fs != NULL
1257	0	0	&& push_fail_stack (aTHX_ fs, *pidx, candidate, nregs, regs,
1258			eps_via_nodes))
1259	0		return REG_ERROR;
1260
1261			/* We know we are going to exit. */
1262	0		break;
1263			}
1264			}
1265	35		return dest_node;
1266			}
1267			else
1268			{
1269	22		Idx naccepted = 0;
1270	22		re_token_type_t type = dfa->nodes[node].type;
1271
1272			#ifdef RE_ENABLE_I18N
1273	22	100	if (dfa->nodes[node].accept_mb)
1274	9		naccepted = check_node_accept_bytes (aTHX_ dfa, node, &mctx->input, mctx->sv, *pidx);
1275			else
1276			#endif /* RE_ENABLE_I18N */
1277	13	50	if (type == OP_BACK_REF)
1278			{
1279	0		Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1280	0		naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1281	0	0	if (fs != NULL)
1282			{
1283	0	0	if (regs[subexp_idx].rm_so == -1 \|\| regs[subexp_idx].rm_eo == -1)
		0
1284	0		return REG_MISSING;
1285	0	0	else if (naccepted)
1286			{
1287	0		char buf = (char ) re_string_get_buffer (aTHX_ &mctx->input);
1288	0	0	if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1289			naccepted) != 0)
1290	0		return REG_MISSING;
1291			}
1292			}
1293
1294	0	0	if (naccepted == 0)
1295			{
1296			Idx dest_node;
1297	0		ok = re_node_set_insert (aTHX_ eps_via_nodes, node);
1298	0	0	if (BE (! ok, 0))
1299	0		return REG_ERROR;
1300	0		dest_node = dfa->edests[node].elems[0];
1301	0	0	if (re_node_set_contains (aTHX_ &mctx->state_log[*pidx]->nodes,
1302			dest_node))
1303	0		return dest_node;
1304			}
1305			}
1306
1307	22	100	if (naccepted != 0
1308	13	50	\|\| check_node_accept (aTHX_ mctx, dfa->nodes + node, *pidx))
1309			{
1310	22		Idx dest_node = dfa->nexts[node];
1311	22	100	pidx = (naccepted == 0) ? pidx + 1 : *pidx + naccepted;
1312	22	50	if (fs && (pidx > mctx->match_last \|\| mctx->state_log[pidx] == NULL
		0
		0
1313	0	0	\|\| !re_node_set_contains (aTHX_ &mctx->state_log[*pidx]->nodes,
1314			dest_node)))
1315	0		return REG_MISSING;
1316	22		re_node_set_empty (aTHX_ eps_via_nodes);
1317	22		return dest_node;
1318			}
1319			}
1320	0		return REG_MISSING;
1321			}
1322
1323			static reg_errcode_t
1324			internal_function __attribute_warn_unused_result__
1325	0		push_fail_stack (pTHX_ struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1326			Idx nregs, regmatch_t regs, re_node_set eps_via_nodes)
1327			{
1328			reg_errcode_t err;
1329	0		Idx num = fs->num++;
1330	0	0	if (fs->num == fs->alloc)
1331			{
1332			/* new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) * fs->alloc * 2)); */
1333	0	0	Renew(fs->stack, (sizeof (struct re_fail_stack_ent_t) * fs->alloc * 2), struct re_fail_stack_ent_t);
1334	0		fs->alloc *= 2;
1335			}
1336	0		fs->stack[num].idx = str_idx;
1337	0		fs->stack[num].node = dest_node;
1338	0	0	re_malloc (fs->stack[num].regs, regmatch_t, nregs);
1339	0	0	Copy (regs, fs->stack[num].regs, nregs, regmatch_t);
1340	0		err = re_node_set_init_copy (aTHX_ &fs->stack[num].eps_via_nodes, eps_via_nodes);
1341	0		return err;
1342			}
1343
1344			static Idx
1345			internal_function
1346	0		pop_fail_stack (pTHX_ struct re_fail_stack_t fs, Idx pidx, Idx nregs,
1347			regmatch_t regs, re_node_set eps_via_nodes)
1348			{
1349	0		Idx num = --fs->num;
1350			assert (REG_VALID_INDEX (num));
1351	0		*pidx = fs->stack[num].idx;
1352	0	0	Copy (fs->stack[num].regs, regs, nregs, regmatch_t);
1353	0	0	re_node_set_free (eps_via_nodes);
1354	0	0	re_free (fs->stack[num].regs);
1355	0		*eps_via_nodes = fs->stack[num].eps_via_nodes;
1356	0		return fs->stack[num].node;
1357			}
1358
1359			/* Set the positions where the subexpressions are starts/ends to registers
1360			PMATCH.
1361			Note: We assume that pmatch[0] is already set, and
1362			pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */
1363
1364			static reg_errcode_t
1365			internal_function __attribute_warn_unused_result__
1366	12		set_regs (pTHX_ const regex_t preg, const re_match_context_t mctx, size_t nmatch,
1367			regmatch_t *pmatch, bool fl_backtrack)
1368			{
1369	12		const re_dfa_t *dfa = preg->buffer;
1370			Idx idx, cur_node;
1371			re_node_set eps_via_nodes;
1372			struct re_fail_stack_t *fs;
1373	12		struct re_fail_stack_t fs_body = { 0, 2, NULL };
1374			regmatch_t *prev_idx_match;
1375	12		bool prev_idx_match_malloced = false;
1376
1377			#ifdef DEBUG
1378			assert (nmatch > 1);
1379			assert (mctx->state_log != NULL);
1380			#endif
1381	12	50	if (fl_backtrack)
1382			{
1383	0		fs = &fs_body;
1384	0	0	re_malloc (fs->stack, struct re_fail_stack_ent_t, fs->alloc);
1385			}
1386			else
1387	12		fs = NULL;
1388
1389	12		cur_node = dfa->init_node;
1390	12		re_node_set_init_empty (&eps_via_nodes);
1391
1392			if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1393			prev_idx_match = (regmatch_t ) alloca (nmatch sizeof (regmatch_t));
1394			else
1395			{
1396	12	50	re_malloc (prev_idx_match, regmatch_t, nmatch);
1397	12		prev_idx_match_malloced = true;
1398			}
1399	12	50	Copy (pmatch, prev_idx_match, nmatch, regmatch_t);
1400
1401	69	50	for (idx = (Idx)pmatch[0].rm_so; idx <= (Idx)pmatch[0].rm_eo ;)
1402			{
1403	69		update_regs (aTHX_ dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1404
1405	69	100	if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
		100
1406			{
1407			Idx reg_idx;
1408	12	50	if (fs)
1409			{
1410	0	0	for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1411	0	0	if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
		0
1412	0		break;
1413	0	0	if (reg_idx == nmatch)
1414			{
1415	0	0	re_node_set_free (&eps_via_nodes);
1416	0	0	if (prev_idx_match_malloced)
1417	0	0	re_free (prev_idx_match);
1418	0		return free_fail_stack_return (aTHX_ fs);
1419			}
1420	0		cur_node = pop_fail_stack (aTHX_ fs, &idx, nmatch, pmatch,
1421			&eps_via_nodes);
1422			}
1423			else
1424			{
1425	12	50	re_node_set_free (&eps_via_nodes);
1426	12	50	if (prev_idx_match_malloced)
1427	12	50	re_free (prev_idx_match);
1428	12		return REG_NOERROR;
1429			}
1430			}
1431
1432			/* Proceed to next node. */
1433	57		cur_node = proceed_next_node (aTHX_ mctx, nmatch, pmatch, &idx, cur_node,
1434			&eps_via_nodes, fs);
1435
1436	57	50	if (BE (! REG_VALID_INDEX (cur_node), 0))
1437			{
1438	0	0	if (BE (cur_node == REG_ERROR, 0))
1439			{
1440	0	0	re_node_set_free (&eps_via_nodes);
1441	0	0	if (prev_idx_match_malloced)
1442	0	0	re_free (prev_idx_match);
1443	0		free_fail_stack_return (aTHX_ fs);
1444	0		return REG_ESPACE;
1445			}
1446	0	0	if (fs)
1447	0		cur_node = pop_fail_stack (aTHX_ fs, &idx, nmatch, pmatch,
1448			&eps_via_nodes);
1449			else
1450			{
1451	0	0	re_node_set_free (&eps_via_nodes);
1452	0	0	if (prev_idx_match_malloced)
1453	0	0	re_free (prev_idx_match);
1454	0		return REG_NOMATCH;
1455			}
1456			}
1457			}
1458	0	0	re_node_set_free (&eps_via_nodes);
1459	0	0	if (prev_idx_match_malloced)
1460	0	0	re_free (prev_idx_match);
1461	12		return free_fail_stack_return (aTHX_ fs);
1462			}
1463
1464			static reg_errcode_t
1465			internal_function
1466	0		free_fail_stack_return (pTHX_ struct re_fail_stack_t *fs)
1467			{
1468	0	0	if (fs)
1469			{
1470			Idx fs_idx;
1471	0	0	for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1472			{
1473	0	0	re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1474	0	0	re_free (fs->stack[fs_idx].regs);
1475			}
1476	0	0	re_free (fs->stack);
1477			}
1478	0		return REG_NOERROR;
1479			}
1480
1481			static void
1482			internal_function
1483	69		update_regs (pTHX_ const re_dfa_t dfa, regmatch_t pmatch,
1484			regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1485			{
1486	69		int type = dfa->nodes[cur_node].type;
1487	69	100	if (type == OP_OPEN_SUBEXP)
1488			{
1489	13		Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1490
1491			/* We are at the first node of this sub expression. */
1492	13	50	if (reg_num < nmatch)
1493			{
1494	13		pmatch[reg_num].rm_so = cur_idx;
1495	13		pmatch[reg_num].rm_eo = -1;
1496			}
1497			}
1498	56	100	else if (type == OP_CLOSE_SUBEXP)
1499			{
1500	13		Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1501	13	50	if (reg_num < nmatch)
1502			{
1503			/* We are at the last node of this sub expression. */
1504	13	50	if ((Idx)pmatch[reg_num].rm_so < cur_idx)
1505			{
1506	13		pmatch[reg_num].rm_eo = cur_idx;
1507			/* This is a non-empty match or we are not inside an optional
1508			subexpression. Accept this right away. */
1509	13	50	Copy (pmatch, prev_idx_match, nmatch, regmatch_t);
1510			}
1511			else
1512			{
1513	0	0	if (dfa->nodes[cur_node].opt_subexp
1514	0	0	&& prev_idx_match[reg_num].rm_so != -1)
1515			/* We transited through an empty match for an optional
1516			subexpression, like (a?)*, and this is not the subexp's
1517			first match. Copy back the old content of the registers
1518			so that matches of an inner subexpression are undone as
1519			well, like in ((a?)). /
1520	0	0	Copy (prev_idx_match, pmatch, nmatch, regmatch_t);
1521			else
1522			/* We completed a subexpression, but it may be part of
1523			an optional one, so do not update PREV_IDX_MATCH. */
1524	0		pmatch[reg_num].rm_eo = cur_idx;
1525			}
1526			}
1527			}
1528	69		}
1529
1530			/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
1531			and sift the nodes in each states according to the following rules.
1532			Updated state_log will be wrote to STATE_LOG.
1533
1534			Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1535			1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1536			If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1537			the LAST_NODE, we throw away the node 'a'.
1538			2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1539			string 's' and transit to 'b':
1540			i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1541			away the node 'a'.
1542			ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1543			thrown away, we throw away the node 'a'.
1544			3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1545			i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1546			node 'a'.
1547			ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1548			we throw away the node 'a'. */
1549
1550			#define STATE_NODE_CONTAINS(state,node) \
1551			((state) != NULL && re_node_set_contains (aTHX_ &(state)->nodes, node))
1552
1553			static reg_errcode_t
1554			internal_function
1555	9		sift_states_backward (pTHX_ const re_match_context_t mctx, re_sift_context_t sctx)
1556			{
1557			reg_errcode_t err;
1558	9		int null_cnt = 0;
1559	9		Idx str_idx = sctx->last_str_idx;
1560			re_node_set cur_dest;
1561
1562			#ifdef DEBUG
1563			assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1564			#endif
1565
1566			/* Build sifted state_log[str_idx]. It has the nodes which can epsilon
1567			transit to the last_node and the last_node itself. */
1568	9		err = re_node_set_init_1 (aTHX_ &cur_dest, sctx->last_node);
1569	9	50	if (BE (err != REG_NOERROR, 0))
1570	0		return err;
1571	9		err = update_cur_sifted_state (aTHX_ mctx, sctx, str_idx, &cur_dest);
1572	9	50	if (BE (err != REG_NOERROR, 0))
1573	0		goto free_return;
1574
1575			/* Then check each states in the state_log. */
1576	37	100	while (str_idx > 0)
1577			{
1578			/* Update counters. */
1579	28	100	null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1580	28	50	if (null_cnt > mctx->max_mb_elem_len)
1581			{
1582	0		memset (sctx->sifted_states, '\0',
1583			sizeof (re_dfastate_t ) str_idx);
1584	0	0	re_node_set_free (&cur_dest);
1585	0		return REG_NOERROR;
1586			}
1587	28		re_node_set_empty (aTHX_ &cur_dest);
1588	28		--str_idx;
1589
1590	28	100	if (mctx->state_log[str_idx])
1591			{
1592	10		err = build_sifted_states (aTHX_ mctx, sctx, str_idx, &cur_dest);
1593	10	50	if (BE (err != REG_NOERROR, 0))
1594	0		goto free_return;
1595			}
1596
1597			/* Add all the nodes which satisfy the following conditions:
1598			- It can epsilon transit to a node in CUR_DEST.
1599			- It is in CUR_SRC.
1600			And update state_log. */
1601	28		err = update_cur_sifted_state (aTHX_ mctx, sctx, str_idx, &cur_dest);
1602	28	50	if (BE (err != REG_NOERROR, 0))
1603	0		goto free_return;
1604			}
1605	9		err = REG_NOERROR;
1606			free_return:
1607	9	50	re_node_set_free (&cur_dest);
1608	9		return err;
1609			}
1610
1611			static reg_errcode_t
1612			internal_function __attribute_warn_unused_result__
1613	10		build_sifted_states (pTHX_ const re_match_context_t mctx, re_sift_context_t sctx,
1614			Idx str_idx, re_node_set *cur_dest)
1615			{
1616	10		const re_dfa_t *const dfa = mctx->dfa;
1617	10		const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1618			Idx i;
1619
1620			/* Then build the next sifted state.
1621			We build the next sifted state on 'cur_dest', and update
1622			'sifted_states[str_idx]' with 'cur_dest'.
1623			Note:
1624			'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1625			'cur_src' points the node_set of the old 'state_log[str_idx]'
1626			(with the epsilon nodes pre-filtered out). */
1627	29	100	for (i = 0; i < cur_src->nelem; i++)
1628			{
1629	19		Idx prev_node = cur_src->elems[i];
1630	19		int naccepted = 0;
1631			bool ok;
1632
1633			#ifdef DEBUG
1634			re_token_type_t type = dfa->nodes[prev_node].type;
1635			assert (!IS_EPSILON_NODE (type));
1636			#endif
1637			#ifdef RE_ENABLE_I18N
1638			/* If the node may accept "multi byte". */
1639	19	100	if (dfa->nodes[prev_node].accept_mb)
1640	9		naccepted = sift_states_iter_mb (aTHX_ mctx, sctx, prev_node,
1641			str_idx, sctx->last_str_idx);
1642			#endif /* RE_ENABLE_I18N */
1643
1644			/* We don't check backreferences here.
1645			See update_cur_sifted_state(). */
1646	19	100	if (!naccepted
1647	10	100	&& check_node_accept (aTHX_ mctx, dfa->nodes + prev_node, str_idx)
1648	1	50	&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
		50
1649			dfa->nexts[prev_node]))
1650	1		naccepted = 1;
1651
1652	19	100	if (naccepted == 0)
1653	9		continue;
1654
1655	10	50	if (sctx->limits.nelem)
1656			{
1657	0		Idx to_idx = str_idx + naccepted;
1658	0	0	if (check_dst_limits (aTHX_ mctx, &sctx->limits,
1659	0		dfa->nexts[prev_node], to_idx,
1660			prev_node, str_idx))
1661	0		continue;
1662			}
1663	10		ok = re_node_set_insert (aTHX_ cur_dest, prev_node);
1664	10	50	if (BE (! ok, 0))
1665	0		return REG_ESPACE;
1666			}
1667
1668	10		return REG_NOERROR;
1669			}
1670
1671			/* Helper functions. */
1672
1673			static reg_errcode_t
1674			internal_function
1675	16		clean_state_log_if_needed (pTHX_ re_match_context_t *mctx, Idx next_state_log_idx)
1676			{
1677	16		Idx top = mctx->state_log_top;
1678
1679	16	50	if ((next_state_log_idx >= mctx->input.bufs_len
1680	0	0	&& mctx->input.bufs_len < mctx->input.len)
1681	16	50	\|\| (next_state_log_idx >= mctx->input.valid_len
1682	0	0	&& mctx->input.valid_len < mctx->input.len))
1683			{
1684			reg_errcode_t err;
1685	0		err = extend_buffers (aTHX_ mctx, next_state_log_idx + 1);
1686	0	0	if (BE (err != REG_NOERROR, 0))
1687	0		return err;
1688			}
1689
1690	16	50	if (top < next_state_log_idx)
1691			{
1692	16		memset (mctx->state_log + top + 1, '\0',
1693	16		sizeof (re_dfastate_t ) (next_state_log_idx - top));
1694	16		mctx->state_log_top = next_state_log_idx;
1695			}
1696	16		return REG_NOERROR;
1697			}
1698
1699			static reg_errcode_t
1700			internal_function
1701	0		merge_state_array (pTHX_ const re_dfa_t dfa, re_dfastate_t *dst,
1702			re_dfastate_t **src, Idx num)
1703			{
1704			Idx st_idx;
1705			reg_errcode_t err;
1706	0	0	for (st_idx = 0; st_idx < num; ++st_idx)
1707			{
1708	0	0	if (dst[st_idx] == NULL)
1709	0		dst[st_idx] = src[st_idx];
1710	0	0	else if (src[st_idx] != NULL)
1711			{
1712			re_node_set merged_set;
1713	0		err = re_node_set_init_union (aTHX_ &merged_set, &dst[st_idx]->nodes,
1714	0		&src[st_idx]->nodes);
1715	0	0	if (BE (err != REG_NOERROR, 0))
1716	0		return err;
1717	0		dst[st_idx] = re_acquire_state (aTHX_ &err, dfa, &merged_set);
1718	0	0	re_node_set_free (&merged_set);
1719	0	0	if (BE (err != REG_NOERROR, 0))
1720	0		return err;
1721			}
1722			}
1723	0		return REG_NOERROR;
1724			}
1725
1726			static reg_errcode_t
1727			internal_function
1728	37		update_cur_sifted_state (pTHX_ const re_match_context_t *mctx,
1729			re_sift_context_t *sctx, Idx str_idx,
1730			re_node_set *dest_nodes)
1731			{
1732	37		const re_dfa_t *const dfa = mctx->dfa;
1733	37		reg_errcode_t err = REG_NOERROR;
1734			const re_node_set *candidates;
1735	74		candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1736	37	100	: &mctx->state_log[str_idx]->nodes);
1737
1738	37	100	if (dest_nodes->nelem == 0)
1739	18		sctx->sifted_states[str_idx] = NULL;
1740			else
1741			{
1742	19	50	if (candidates)
1743			{
1744			/* At first, add the nodes which can epsilon transit to a node in
1745			DEST_NODE. */
1746	19		err = add_epsilon_src_nodes (aTHX_ dfa, dest_nodes, candidates);
1747	19	50	if (BE (err != REG_NOERROR, 0))
1748	0		return err;
1749
1750			/* Then, check the limitations in the current sift_context. */
1751	19	50	if (sctx->limits.nelem)
1752			{
1753	0		err = check_subexp_limits (aTHX_ dfa, dest_nodes, candidates, &sctx->limits,
1754			mctx->bkref_ents, str_idx);
1755	0	0	if (BE (err != REG_NOERROR, 0))
1756	0		return err;
1757			}
1758			}
1759
1760	19		sctx->sifted_states[str_idx] = re_acquire_state (aTHX_ &err, dfa, dest_nodes);
1761	19	50	if (BE (err != REG_NOERROR, 0))
1762	0		return err;
1763			}
1764
1765	37	100	if (candidates && mctx->state_log[str_idx]->has_backref)
		50
1766			{
1767	0		err = sift_states_bkref (aTHX_ mctx, sctx, str_idx, candidates);
1768	0	0	if (BE (err != REG_NOERROR, 0))
1769	0		return err;
1770			}
1771	37		return REG_NOERROR;
1772			}
1773
1774			static reg_errcode_t
1775			internal_function __attribute_warn_unused_result__
1776	19		add_epsilon_src_nodes (pTHX_ const re_dfa_t dfa, re_node_set dest_nodes,
1777			const re_node_set *candidates)
1778			{
1779	19		reg_errcode_t err = REG_NOERROR;
1780			Idx i;
1781
1782	19		re_dfastate_t *state = re_acquire_state (aTHX_ &err, dfa, dest_nodes);
1783	19	50	if (BE (err != REG_NOERROR, 0))
1784	0		return err;
1785
1786	19	100	if (!state->inveclosure.alloc)
1787			{
1788	5		err = re_node_set_alloc (aTHX_ &state->inveclosure, dest_nodes->nelem);
1789	5	50	if (BE (err != REG_NOERROR, 0))
1790	0		return REG_ESPACE;
1791	10	100	for (i = 0; i < dest_nodes->nelem; i++)
1792			{
1793	5		err = re_node_set_merge (aTHX_ &state->inveclosure,
1794	5		dfa->inveclosures + dest_nodes->elems[i]);
1795	5	50	if (BE (err != REG_NOERROR, 0))
1796	0		return REG_ESPACE;
1797			}
1798			}
1799	19		return re_node_set_add_intersect (aTHX_ dest_nodes, candidates,
1800	19		&state->inveclosure);
1801			}
1802
1803			static reg_errcode_t
1804			internal_function
1805	0		sub_epsilon_src_nodes (pTHX_ const re_dfa_t dfa, Idx node, re_node_set dest_nodes,
1806			const re_node_set *candidates)
1807			{
1808			Idx ecl_idx;
1809			reg_errcode_t err;
1810	0		re_node_set *inv_eclosure = dfa->inveclosures + node;
1811			re_node_set except_nodes;
1812	0		re_node_set_init_empty (&except_nodes);
1813	0	0	for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1814			{
1815	0		Idx cur_node = inv_eclosure->elems[ecl_idx];
1816	0	0	if (cur_node == node)
1817	0		continue;
1818	0	0	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1819			{
1820	0		Idx edst1 = dfa->edests[cur_node].elems[0];
1821	0		Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1822	0	0	? dfa->edests[cur_node].elems[1] : REG_MISSING);
1823	0	0	if ((!re_node_set_contains (aTHX_ inv_eclosure, edst1)
1824	0	0	&& re_node_set_contains (aTHX_ dest_nodes, edst1))
1825	0	0	\|\| (REG_VALID_NONZERO_INDEX (edst2)
1826	0	0	&& !re_node_set_contains (aTHX_ inv_eclosure, edst2)
1827	0	0	&& re_node_set_contains (aTHX_ dest_nodes, edst2)))
1828			{
1829	0		err = re_node_set_add_intersect (aTHX_ &except_nodes, candidates,
1830	0		dfa->inveclosures + cur_node);
1831	0	0	if (BE (err != REG_NOERROR, 0))
1832			{
1833	0	0	re_node_set_free (&except_nodes);
1834	0		return err;
1835			}
1836			}
1837			}
1838			}
1839	0	0	for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1840			{
1841	0		Idx cur_node = inv_eclosure->elems[ecl_idx];
1842	0	0	if (!re_node_set_contains (aTHX_ &except_nodes, cur_node))
1843			{
1844	0		Idx idx = re_node_set_contains (aTHX_ dest_nodes, cur_node) - 1;
1845	0		re_node_set_remove_at (aTHX_ dest_nodes, idx);
1846			}
1847			}
1848	0	0	re_node_set_free (&except_nodes);
1849	0		return REG_NOERROR;
1850			}
1851
1852			static bool
1853			internal_function
1854	0		check_dst_limits (pTHX_ const re_match_context_t mctx, const re_node_set limits,
1855			Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1856			{
1857	0		const re_dfa_t *const dfa = mctx->dfa;
1858			Idx lim_idx, src_pos, dst_pos;
1859
1860	0		Idx dst_bkref_idx = search_cur_bkref_entry (aTHX_ mctx, dst_idx);
1861	0		Idx src_bkref_idx = search_cur_bkref_entry (aTHX_ mctx, src_idx);
1862	0	0	for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1863			{
1864			Idx subexp_idx;
1865			struct re_backref_cache_entry *ent;
1866	0		ent = mctx->bkref_ents + limits->elems[lim_idx];
1867	0		subexp_idx = dfa->nodes[ent->node].opr.idx;
1868
1869	0		dst_pos = check_dst_limits_calc_pos (aTHX_ mctx, limits->elems[lim_idx],
1870			subexp_idx, dst_node, dst_idx,
1871			dst_bkref_idx);
1872	0		src_pos = check_dst_limits_calc_pos (aTHX_ mctx, limits->elems[lim_idx],
1873			subexp_idx, src_node, src_idx,
1874			src_bkref_idx);
1875
1876			/* In case of:
1877			( )
1878			( )
1879			( ) */
1880	0	0	if (src_pos == dst_pos)
1881	0		continue; /* This is unrelated limitation. */
1882			else
1883	0		return true;
1884			}
1885	0		return false;
1886			}
1887
1888			static int
1889			internal_function
1890	0		check_dst_limits_calc_pos_1 (pTHX_ const re_match_context_t *mctx, int boundaries,
1891			Idx subexp_idx, Idx from_node, Idx bkref_idx)
1892			{
1893	0		const re_dfa_t *const dfa = mctx->dfa;
1894	0		const re_node_set *eclosures = dfa->eclosures + from_node;
1895			Idx node_idx;
1896
1897			/* Else, we are on the boundary: examine the nodes on the epsilon
1898			closure. */
1899	0	0	for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1900			{
1901	0		Idx node = eclosures->elems[node_idx];
1902	0		switch (dfa->nodes[node].type)
1903			{
1904			case OP_BACK_REF:
1905	0	0	if (bkref_idx != REG_MISSING)
1906			{
1907	0		struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1908			do
1909			{
1910			Idx dst;
1911			int cpos;
1912
1913	0	0	if (ent->node != node)
1914	0		continue;
1915
1916	0	0	if (subexp_idx < BITSET_WORD_BITS
1917	0	0	&& !(ent->eps_reachable_subexps_map
1918			& ((bitset_word_t) 1 << subexp_idx)))
1919	0		continue;
1920
1921			/* Recurse trying to reach the OP_OPEN_SUBEXP and
1922			OP_CLOSE_SUBEXP cases below. But, if the
1923			destination node is the same node as the source
1924			node, don't recurse because it would cause an
1925			infinite loop: a regex that exhibits this behavior
1926			is ()\1\1 */
1927	0		dst = dfa->edests[node].elems[0];
1928	0	0	if (dst == from_node)
1929			{
1930	0	0	if (boundaries & 1)
1931	0		return -1;
1932			else /* if (boundaries & 2) */
1933	0		return 0;
1934			}
1935
1936	0		cpos =
1937			check_dst_limits_calc_pos_1 (aTHX_ mctx, boundaries, subexp_idx,
1938			dst, bkref_idx);
1939	0	0	if (cpos == -1 /* && (boundaries & 1) */)
1940	0		return -1;
1941	0	0	if (cpos == 0 && (boundaries & 2))
		0
1942	0		return 0;
1943
1944	0	0	if (subexp_idx < BITSET_WORD_BITS)
1945			ent->eps_reachable_subexps_map
1946	0		&= ~((bitset_word_t) 1 << subexp_idx);
1947			}
1948	0	0	while (ent++->more);
1949			}
1950	0		break;
1951
1952			case OP_OPEN_SUBEXP:
1953	0	0	if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
		0
1954	0		return -1;
1955	0		break;
1956
1957			case OP_CLOSE_SUBEXP:
1958	0	0	if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
		0
1959	0		return 0;
1960	0		break;
1961
1962			default:
1963	0		break;
1964			}
1965			}
1966
1967	0		return (boundaries & 2) ? 1 : 0;
1968			}
1969
1970			static int
1971			internal_function
1972	0		check_dst_limits_calc_pos (pTHX_ const re_match_context_t *mctx, Idx limit,
1973			Idx subexp_idx, Idx from_node, Idx str_idx,
1974			Idx bkref_idx)
1975			{
1976	0		struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1977			int boundaries;
1978
1979			/* If we are outside the range of the subexpression, return -1 or 1. */
1980	0	0	if (str_idx < lim->subexp_from)
1981	0		return -1;
1982
1983	0	0	if (lim->subexp_to < str_idx)
1984	0		return 1;
1985
1986			/* If we are within the subexpression, return 0. */
1987	0		boundaries = (str_idx == lim->subexp_from);
1988	0	0	boundaries \|= (str_idx == lim->subexp_to) << 1;
1989	0	0	if (boundaries == 0)
1990	0		return 0;
1991
1992			/* Else, examine epsilon closure. */
1993	0		return check_dst_limits_calc_pos_1 (aTHX_ mctx, boundaries, subexp_idx,
1994			from_node, bkref_idx);
1995			}
1996
1997			/* Check the limitations of sub expressions LIMITS, and remove the nodes
1998			which are against limitations from DEST_NODES. */
1999
2000			static reg_errcode_t
2001			internal_function
2002	0		check_subexp_limits (pTHX_ const re_dfa_t dfa, re_node_set dest_nodes,
2003			const re_node_set candidates, re_node_set limits,
2004			struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2005			{
2006			reg_errcode_t err;
2007			Idx node_idx, lim_idx;
2008
2009	0	0	for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
2010			{
2011			Idx subexp_idx;
2012			struct re_backref_cache_entry *ent;
2013	0		ent = bkref_ents + limits->elems[lim_idx];
2014
2015	0	0	if (str_idx <= ent->subexp_from \|\| ent->str_idx < str_idx)
		0
2016	0		continue; /* This is unrelated limitation. */
2017
2018	0		subexp_idx = dfa->nodes[ent->node].opr.idx;
2019	0	0	if (ent->subexp_to == str_idx)
2020			{
2021	0		Idx ops_node = REG_MISSING;
2022	0		Idx cls_node = REG_MISSING;
2023	0	0	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2024			{
2025	0		Idx node = dest_nodes->elems[node_idx];
2026	0		re_token_type_t type = dfa->nodes[node].type;
2027	0	0	if (type == OP_OPEN_SUBEXP
2028	0	0	&& subexp_idx == dfa->nodes[node].opr.idx)
2029	0		ops_node = node;
2030	0	0	else if (type == OP_CLOSE_SUBEXP
2031	0	0	&& subexp_idx == dfa->nodes[node].opr.idx)
2032	0		cls_node = node;
2033			}
2034
2035			/* Check the limitation of the open subexpression. */
2036			/* Note that (ent->subexp_to = str_idx != ent->subexp_from). */
2037	0	0	if (REG_VALID_INDEX (ops_node))
2038			{
2039	0		err = sub_epsilon_src_nodes (aTHX_ dfa, ops_node, dest_nodes,
2040			candidates);
2041	0	0	if (BE (err != REG_NOERROR, 0))
2042	0		return err;
2043			}
2044
2045			/* Check the limitation of the close subexpression. */
2046	0	0	if (REG_VALID_INDEX (cls_node))
2047	0	0	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2048			{
2049	0		Idx node = dest_nodes->elems[node_idx];
2050	0	0	if (!re_node_set_contains (aTHX_ dfa->inveclosures + node,
2051			cls_node)
2052	0	0	&& !re_node_set_contains (aTHX_ dfa->eclosures + node,
2053			cls_node))
2054			{
2055			/* It is against this limitation.
2056			Remove it form the current sifted state. */
2057	0		err = sub_epsilon_src_nodes (aTHX_ dfa, node, dest_nodes,
2058			candidates);
2059	0	0	if (BE (err != REG_NOERROR, 0))
2060	0		return err;
2061	0		--node_idx;
2062			}
2063			}
2064			}
2065			else /* (ent->subexp_to != str_idx) */
2066			{
2067	0	0	for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2068			{
2069	0		Idx node = dest_nodes->elems[node_idx];
2070	0		re_token_type_t type = dfa->nodes[node].type;
2071	0	0	if (type == OP_CLOSE_SUBEXP \|\| type == OP_OPEN_SUBEXP)
		0
2072			{
2073	0	0	if (subexp_idx != dfa->nodes[node].opr.idx)
2074	0		continue;
2075			/* It is against this limitation.
2076			Remove it form the current sifted state. */
2077	0		err = sub_epsilon_src_nodes (aTHX_ dfa, node, dest_nodes,
2078			candidates);
2079	0	0	if (BE (err != REG_NOERROR, 0))
2080	0		return err;
2081			}
2082			}
2083			}
2084			}
2085	0		return REG_NOERROR;
2086			}
2087
2088			static reg_errcode_t
2089			internal_function __attribute_warn_unused_result__
2090	0		sift_states_bkref (pTHX_ const re_match_context_t mctx, re_sift_context_t sctx,
2091			Idx str_idx, const re_node_set *candidates)
2092			{
2093	0		const re_dfa_t *const dfa = mctx->dfa;
2094			reg_errcode_t err;
2095			Idx node_idx, node;
2096			re_sift_context_t local_sctx;
2097	0		Idx first_idx = search_cur_bkref_entry (aTHX_ mctx, str_idx);
2098
2099	0	0	if (first_idx == REG_MISSING)
2100	0		return REG_NOERROR;
2101
2102	0		local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
2103
2104	0	0	for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2105			{
2106			Idx enabled_idx;
2107			re_token_type_t type;
2108			struct re_backref_cache_entry *entry;
2109	0		node = candidates->elems[node_idx];
2110	0		type = dfa->nodes[node].type;
2111			/* Avoid infinite loop for the REs like "()\1+". */
2112	0	0	if (node == sctx->last_node && str_idx == sctx->last_str_idx)
		0
2113	0		continue;
2114	0	0	if (type != OP_BACK_REF)
2115	0		continue;
2116
2117	0		entry = mctx->bkref_ents + first_idx;
2118	0		enabled_idx = first_idx;
2119			do
2120			{
2121			Idx subexp_len;
2122			Idx to_idx;
2123			Idx dst_node;
2124			bool ok;
2125			re_dfastate_t *cur_state;
2126
2127	0	0	if (entry->node != node)
2128	0		continue;
2129	0		subexp_len = entry->subexp_to - entry->subexp_from;
2130	0		to_idx = str_idx + subexp_len;
2131	0		dst_node = (subexp_len ? dfa->nexts[node]
2132	0	0	: dfa->edests[node].elems[0]);
2133
2134	0	0	if (to_idx > sctx->last_str_idx
2135	0	0	\|\| sctx->sifted_states[to_idx] == NULL
2136	0	0	\|\| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
		0
2137	0	0	\|\| check_dst_limits (aTHX_ mctx, &sctx->limits, node,
2138			str_idx, dst_node, to_idx))
2139	0		continue;
2140
2141	0	0	if (local_sctx.sifted_states == NULL)
2142			{
2143	0		local_sctx = *sctx;
2144	0		err = re_node_set_init_copy (aTHX_ &local_sctx.limits, &sctx->limits);
2145	0	0	if (BE (err != REG_NOERROR, 0))
2146	0		goto free_return;
2147			}
2148	0		local_sctx.last_node = node;
2149	0		local_sctx.last_str_idx = str_idx;
2150	0		ok = re_node_set_insert (aTHX_ &local_sctx.limits, enabled_idx);
2151	0	0	if (BE (! ok, 0))
2152			{
2153	0		err = REG_ESPACE;
2154	0		goto free_return;
2155			}
2156	0		cur_state = local_sctx.sifted_states[str_idx];
2157	0		err = sift_states_backward (aTHX_ mctx, &local_sctx);
2158	0	0	if (BE (err != REG_NOERROR, 0))
2159	0		goto free_return;
2160	0	0	if (sctx->limited_states != NULL)
2161			{
2162	0		err = merge_state_array (aTHX_ dfa, sctx->limited_states,
2163			local_sctx.sifted_states,
2164			str_idx + 1);
2165	0	0	if (BE (err != REG_NOERROR, 0))
2166	0		goto free_return;
2167			}
2168	0		local_sctx.sifted_states[str_idx] = cur_state;
2169	0		re_node_set_remove (aTHX_ &local_sctx.limits, enabled_idx);
2170
2171			/* mctx->bkref_ents may have changed, reload the pointer. */
2172	0		entry = mctx->bkref_ents + enabled_idx;
2173			}
2174	0	0	while (enabled_idx++, entry++->more);
2175			}
2176	0		err = REG_NOERROR;
2177			free_return:
2178	0	0	if (local_sctx.sifted_states != NULL)
2179			{
2180	0	0	re_node_set_free (&local_sctx.limits);
2181			}
2182
2183	0		return err;
2184			}
2185
2186
2187			#ifdef RE_ENABLE_I18N
2188			static int
2189			internal_function
2190	9		sift_states_iter_mb (pTHX_ const re_match_context_t mctx, re_sift_context_t sctx,
2191			Idx node_idx, Idx str_idx, Idx max_str_idx)
2192			{
2193	9		const re_dfa_t *const dfa = mctx->dfa;
2194			int naccepted;
2195			/* Check the node can accept "multi byte". */
2196	9		naccepted = check_node_accept_bytes (aTHX_ dfa, node_idx, &mctx->input, mctx->sv, str_idx);
2197	9	50	if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
		50
		50
2198	9	50	!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2199			dfa->nexts[node_idx]))
2200			/* The node can't accept the "multi byte", or the
2201			destination was already thrown away, then the node
2202			could't accept the current input "multi byte". */
2203	0		naccepted = 0;
2204			/* Otherwise, it is sure that the node could accept
2205			'naccepted' bytes input. */
2206	9		return naccepted;
2207			}
2208			#endif /* RE_ENABLE_I18N */
2209
2210
2211			/* Functions for state transition. */
2212
2213			/* Return the next state to which the current state STATE will transit by
2214			accepting the current input byte, and update STATE_LOG if necessary.
2215			If STATE can accept a multibyte char/collating element/back reference
2216			update the destination of STATE_LOG. */
2217
2218			static re_dfastate_t *
2219			internal_function __attribute_warn_unused_result__
2220	61		transit_state (pTHX_ reg_errcode_t err, re_match_context_t mctx,
2221			re_dfastate_t *state)
2222			{
2223			re_dfastate_t **trtable;
2224			unsigned char ch;
2225
2226			#ifdef RE_ENABLE_I18N
2227			/* If the current state can accept multibyte. */
2228	61	100	if (BE (state->accept_mb, 0))
2229			{
2230	17		*err = transit_state_mb (aTHX_ mctx, state);
2231	17	50	if (BE (*err != REG_NOERROR, 0))
2232	0		return NULL;
2233			}
2234			#endif /* RE_ENABLE_I18N */
2235
2236			/* Then decide the next state with the single byte. */
2237			#if 0
2238			if (0)
2239			/* don't use transition table */
2240			return transit_state_sb (err, mctx, state);
2241			#endif
2242
2243			/* Use transition table */
2244	61		ch = re_string_fetch_byte (&mctx->input);
2245			for (;;)
2246			{
2247	80		trtable = state->trtable;
2248	80	100	if (BE (trtable != NULL, 1))
2249	61		return trtable[ch];
2250
2251	19		trtable = state->word_trtable;
2252	19	50	if (BE (trtable != NULL, 1))
2253			{
2254			unsigned int context;
2255			context
2256	0		= re_string_context_at (aTHX_ &mctx->input,
2257	0		re_string_cur_idx (&mctx->input) - 1,
2258			mctx->eflags);
2259	0	0	if (IS_WORD_CONTEXT (context))
2260	0		return trtable[ch + SBC_MAX];
2261			else
2262	0		return trtable[ch];
2263			}
2264
2265	19	50	if (!build_trtable (aTHX_ mctx->dfa, state))
2266			{
2267	0		*err = REG_ESPACE;
2268	0		return NULL;
2269			}
2270
2271			/* Retry, we now have a transition table. */
2272	19		}
2273			}
2274
2275			/* Update the state_log if we need */
2276			static re_dfastate_t *
2277			internal_function
2278	62		merge_state_with_log (pTHX_ reg_errcode_t err, re_match_context_t mctx,
2279			re_dfastate_t *next_state)
2280			{
2281	62		const re_dfa_t *const dfa = mctx->dfa;
2282	62		Idx cur_idx = re_string_cur_idx (&mctx->input);
2283
2284	62	100	if (cur_idx > mctx->state_log_top)
2285			{
2286	30		mctx->state_log[cur_idx] = next_state;
2287	30		mctx->state_log_top = cur_idx;
2288			}
2289	32	100	else if (mctx->state_log[cur_idx] == 0)
2290			{
2291	16		mctx->state_log[cur_idx] = next_state;
2292			}
2293			else
2294			{
2295			re_dfastate_t *pstate;
2296			unsigned int context;
2297	16		re_node_set next_nodes, log_nodes, table_nodes = NULL;
2298			/* If (state_log[cur_idx] != 0), it implies that cur_idx is
2299			the destination of a multibyte char/collating element/
2300			back reference. Then the next state is the union set of
2301			these destinations and the results of the transition table. */
2302	16		pstate = mctx->state_log[cur_idx];
2303	16		log_nodes = pstate->entrance_nodes;
2304	16	50	if (next_state != NULL)
2305			{
2306	0		table_nodes = next_state->entrance_nodes;
2307	0		*err = re_node_set_init_union (aTHX_ &next_nodes, table_nodes,
2308			log_nodes);
2309	0	0	if (BE (*err != REG_NOERROR, 0))
2310	0		return NULL;
2311			}
2312			else
2313	16		next_nodes = *log_nodes;
2314			/* Note: We already add the nodes of the initial state,
2315			then we don't need to add them here. */
2316
2317	16		context = re_string_context_at (aTHX_ &mctx->input,
2318	16		re_string_cur_idx (&mctx->input) - 1,
2319			mctx->eflags);
2320	32		next_state = mctx->state_log[cur_idx]
2321	16		= re_acquire_state_context (aTHX_ err, dfa, &next_nodes, context);
2322			/* We don't need to check errors here, since the return value of
2323			this function is next_state and ERR is already set. */
2324
2325	16	50	if (table_nodes != NULL)
2326	16	0	re_node_set_free (&next_nodes);
2327			}
2328
2329	62	50	if (BE (dfa->nbackref, 0) && next_state != NULL)
		0
2330			{
2331			/* Check OP_OPEN_SUBEXP in the current state in case that we use them
2332			later. We must check them here, since the back references in the
2333			next state might use them. */
2334	0		*err = check_subexp_matching_top (aTHX_ mctx, &next_state->nodes,
2335			cur_idx);
2336	0	0	if (BE (*err != REG_NOERROR, 0))
2337	0		return NULL;
2338
2339			/* If the next state has back references. */
2340	0	0	if (next_state->has_backref)
2341			{
2342	0		*err = transit_state_bkref (aTHX_ mctx, &next_state->nodes);
2343	0	0	if (BE (*err != REG_NOERROR, 0))
2344	0		return NULL;
2345	0		next_state = mctx->state_log[cur_idx];
2346			}
2347			}
2348
2349	62		return next_state;
2350			}
2351
2352			/* Skip bytes in the input that correspond to part of a
2353			multi-byte match, then look in the log for a state
2354			from which to restart matching. */
2355			static re_dfastate_t *
2356			internal_function
2357	33		find_recover_state (pTHX_ reg_errcode_t err, re_match_context_t mctx)
2358			{
2359			re_dfastate_t *cur_state;
2360			do
2361			{
2362	33		Idx max = mctx->state_log_top;
2363	33		Idx cur_str_idx = re_string_cur_idx (aTHX_ &mctx->input);
2364
2365			do
2366			{
2367	49	100	if (++cur_str_idx > max)
2368	17		return NULL;
2369	32		re_string_skip_bytes (aTHX_ &mctx->input, 1);
2370			}
2371	32	100	while (mctx->state_log[cur_str_idx] == NULL);
2372
2373	16		cur_state = merge_state_with_log (aTHX_ err, mctx, NULL);
2374			}
2375	16	50	while (*err == REG_NOERROR && cur_state == NULL);
		50
2376	16		return cur_state;
2377			}
2378
2379			/* Helper functions for transit_state. */
2380
2381			/* From the node set CUR_NODES, pick up the nodes whose types are
2382			OP_OPEN_SUBEXP and which have corresponding back references in the regular
2383			expression. And register them to use them later for evaluating the
2384			corresponding back references. */
2385
2386			static reg_errcode_t
2387			internal_function
2388	0		check_subexp_matching_top (pTHX_ re_match_context_t mctx, re_node_set cur_nodes,
2389			Idx str_idx)
2390			{
2391	0		const re_dfa_t *const dfa = mctx->dfa;
2392			Idx node_idx;
2393			reg_errcode_t err;
2394
2395			/* TODO: This isn't efficient.
2396			Because there might be more than one nodes whose types are
2397			OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2398			nodes.
2399			E.g. RE: (a){2} */
2400	0	0	for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2401			{
2402	0		Idx node = cur_nodes->elems[node_idx];
2403	0	0	if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2404	0	0	&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2405	0	0	&& (dfa->used_bkref_map
2406	0		& ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2407			{
2408	0		err = match_ctx_add_subtop (aTHX_ mctx, node, str_idx);
2409	0	0	if (BE (err != REG_NOERROR, 0))
2410	0		return err;
2411			}
2412			}
2413	0		return REG_NOERROR;
2414			}
2415
2416			#if 0
2417			/* Return the next state to which the current state STATE will transit by
2418			accepting the current input byte. */
2419
2420			static re_dfastate_t *
2421			transit_state_sb (pTHX_ reg_errcode_t err, re_match_context_t mctx,
2422			re_dfastate_t *state)
2423			{
2424			const re_dfa_t *const dfa = mctx->dfa;
2425			re_node_set next_nodes;
2426			re_dfastate_t *next_state;
2427			Idx node_cnt, cur_str_idx = re_string_cur_idx (aTHX_ &mctx->input);
2428			unsigned int context;
2429
2430			*err = re_node_set_alloc (&aTHX_ next_nodes, state->nodes.nelem + 1);
2431			if (BE (*err != REG_NOERROR, 0))
2432			return NULL;
2433			for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2434			{
2435			Idx cur_node = state->nodes.elems[node_cnt];
2436			if (check_node_accept (aTHX_ mctx, dfa->nodes + cur_node, cur_str_idx))
2437			{
2438			*err = re_node_set_merge (aTHX_ &next_nodes,
2439			dfa->eclosures + dfa->nexts[cur_node]);
2440			if (BE (*err != REG_NOERROR, 0))
2441			{
2442			re_node_set_free (&next_nodes);
2443			return NULL;
2444			}
2445			}
2446			}
2447			context = re_string_context_at (aTHX_ &mctx->input, cur_str_idx, mctx->eflags);
2448			next_state = re_acquire_state_context (aTHX_ err, dfa, &next_nodes, context);
2449			/* We don't need to check errors here, since the return value of
2450			this function is next_state and ERR is already set. */
2451
2452			re_node_set_free (&next_nodes);
2453			re_string_skip_bytes (aTHX_ &mctx->input, 1);
2454			return next_state;
2455			}
2456			#endif
2457
2458			#ifdef RE_ENABLE_I18N
2459			static reg_errcode_t
2460			internal_function
2461	17		transit_state_mb (pTHX_ re_match_context_t mctx, re_dfastate_t pstate)
2462			{
2463	17		const re_dfa_t *const dfa = mctx->dfa;
2464			reg_errcode_t err;
2465			Idx i;
2466
2467	85	100	for (i = 0; i < pstate->nodes.nelem; ++i)
2468			{
2469			re_node_set dest_nodes, *new_nodes;
2470	68		Idx cur_node_idx = pstate->nodes.elems[i];
2471			int naccepted;
2472			Idx dest_idx;
2473			unsigned int context;
2474			re_dfastate_t *dest_state;
2475
2476	68	100	if (!dfa->nodes[cur_node_idx].accept_mb)
2477	52		continue;
2478
2479	17	50	if (dfa->nodes[cur_node_idx].constraint)
2480			{
2481	0		context = re_string_context_at (aTHX_ &mctx->input,
2482			re_string_cur_idx (&mctx->input),
2483			mctx->eflags);
2484	0	0	if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
		0
		0
		0
		0
		0
		0
		0
2485			context))
2486	0		continue;
2487			}
2488
2489			/* How many bytes the node can accept? */
2490	17		naccepted = check_node_accept_bytes (aTHX_ dfa, cur_node_idx, &mctx->input, mctx->sv,
2491			re_string_cur_idx (&mctx->input));
2492	17	100	if (naccepted == 0)
2493	1		continue;
2494
2495			/* The node can accepts 'naccepted' bytes. */
2496	16		dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2497	16		mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2498	16		: mctx->max_mb_elem_len);
2499	16		err = clean_state_log_if_needed (aTHX_ mctx, dest_idx);
2500	16	50	if (BE (err != REG_NOERROR, 0))
2501	0		return err;
2502			#ifdef DEBUG
2503			assert (dfa->nexts[cur_node_idx] != REG_MISSING);
2504			#endif
2505	16		new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2506
2507	16		dest_state = mctx->state_log[dest_idx];
2508	16	50	if (dest_state == NULL)
2509	16		dest_nodes = *new_nodes;
2510			else
2511			{
2512	0		err = re_node_set_init_union (aTHX_ &dest_nodes,
2513	0		dest_state->entrance_nodes, new_nodes);
2514	0	0	if (BE (err != REG_NOERROR, 0))
2515	0		return err;
2516			}
2517	16		context = re_string_context_at (aTHX_ &mctx->input, dest_idx - 1,
2518			mctx->eflags);
2519	16		mctx->state_log[dest_idx]
2520	16		= re_acquire_state_context (aTHX_ &err, dfa, &dest_nodes, context);
2521	16	50	if (dest_state != NULL)
2522	0	0	re_node_set_free (&dest_nodes);
2523	16	50	if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
		0
2524	16		return err;
2525			}
2526	17		return REG_NOERROR;
2527			}
2528			#endif /* RE_ENABLE_I18N */
2529
2530			static reg_errcode_t
2531			internal_function
2532	0		transit_state_bkref (pTHX_ re_match_context_t mctx, const re_node_set nodes)
2533			{
2534	0		const re_dfa_t *const dfa = mctx->dfa;
2535			reg_errcode_t err;
2536			Idx i;
2537	0		Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2538
2539	0	0	for (i = 0; i < nodes->nelem; ++i)
2540			{
2541			Idx dest_str_idx, prev_nelem, bkc_idx;
2542	0		Idx node_idx = nodes->elems[i];
2543			unsigned int context;
2544	0		const re_token_t *node = dfa->nodes + node_idx;
2545			re_node_set *new_dest_nodes;
2546
2547			/* Check whether 'node' is a backreference or not. */
2548	0	0	if (node->type != OP_BACK_REF)
2549	0		continue;
2550
2551	0	0	if (node->constraint)
2552			{
2553	0		context = re_string_context_at (aTHX_ &mctx->input, cur_str_idx,
2554			mctx->eflags);
2555	0	0	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
		0
		0
		0
		0
		0
		0
		0
2556	0		continue;
2557			}
2558
2559			/* 'node' is a backreference.
2560			Check the substring which the substring matched. */
2561	0		bkc_idx = mctx->nbkref_ents;
2562	0		err = get_subexp (aTHX_ mctx, node_idx, cur_str_idx);
2563	0	0	if (BE (err != REG_NOERROR, 0))
2564	0		goto free_return;
2565
2566			/* And add the epsilon closures (which is 'new_dest_nodes') of
2567			the backreference to appropriate state_log. */
2568			#ifdef DEBUG
2569			assert (dfa->nexts[node_idx] != REG_MISSING);
2570			#endif
2571	0	0	for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2572			{
2573			Idx subexp_len;
2574			re_dfastate_t *dest_state;
2575			struct re_backref_cache_entry *bkref_ent;
2576	0		bkref_ent = mctx->bkref_ents + bkc_idx;
2577	0	0	if (bkref_ent->node != node_idx \|\| bkref_ent->str_idx != cur_str_idx)
		0
2578	0		continue;
2579	0		subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2580	0		new_dest_nodes = (subexp_len == 0
2581	0		? dfa->eclosures + dfa->edests[node_idx].elems[0]
2582	0	0	: dfa->eclosures + dfa->nexts[node_idx]);
2583	0		dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2584	0		- bkref_ent->subexp_from);
2585	0		context = re_string_context_at (aTHX_ &mctx->input, dest_str_idx - 1,
2586			mctx->eflags);
2587	0		dest_state = mctx->state_log[dest_str_idx];
2588	0		prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2589	0	0	: mctx->state_log[cur_str_idx]->nodes.nelem);
2590			/* Add 'new_dest_node' to state_log. */
2591	0	0	if (dest_state == NULL)
2592			{
2593	0		mctx->state_log[dest_str_idx]
2594	0		= re_acquire_state_context (aTHX_ &err, dfa, new_dest_nodes,
2595			context);
2596	0	0	if (BE (mctx->state_log[dest_str_idx] == NULL
		0
2597			&& err != REG_NOERROR, 0))
2598	0		goto free_return;
2599			}
2600			else
2601			{
2602			re_node_set dest_nodes;
2603	0		err = re_node_set_init_union (aTHX_ &dest_nodes,
2604	0		dest_state->entrance_nodes,
2605			new_dest_nodes);
2606	0	0	if (BE (err != REG_NOERROR, 0))
2607			{
2608	0	0	re_node_set_free (&dest_nodes);
2609	0		goto free_return;
2610			}
2611	0		mctx->state_log[dest_str_idx]
2612	0		= re_acquire_state_context (aTHX_ &err, dfa, &dest_nodes, context);
2613	0	0	re_node_set_free (&dest_nodes);
2614	0	0	if (BE (mctx->state_log[dest_str_idx] == NULL
		0
2615			&& err != REG_NOERROR, 0))
2616	0		goto free_return;
2617			}
2618			/* We need to check recursively if the backreference can epsilon
2619			transit. */
2620	0	0	if (subexp_len == 0
2621	0	0	&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2622			{
2623	0		err = check_subexp_matching_top (aTHX_ mctx, new_dest_nodes,
2624			cur_str_idx);
2625	0	0	if (BE (err != REG_NOERROR, 0))
2626	0		goto free_return;
2627	0		err = transit_state_bkref (aTHX_ mctx, new_dest_nodes);
2628	0	0	if (BE (err != REG_NOERROR, 0))
2629	0		goto free_return;
2630			}
2631			}
2632			}
2633	0		err = REG_NOERROR;
2634			free_return:
2635	0		return err;
2636			}
2637
2638			/* Enumerate all the candidates which the backreference BKREF_NODE can match
2639			at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2640			Note that we might collect inappropriate candidates here.
2641			However, the cost of checking them strictly here is too high, then we
2642			delay these checking for prune_impossible_nodes(). */
2643
2644			static reg_errcode_t
2645			internal_function __attribute_warn_unused_result__
2646	0		get_subexp (pTHX_ re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2647			{
2648	0		const re_dfa_t *const dfa = mctx->dfa;
2649			Idx subexp_num, sub_top_idx;
2650	0		const char buf = (const char ) re_string_get_buffer (&mctx->input);
2651			/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
2652	0		Idx cache_idx = search_cur_bkref_entry (aTHX_ mctx, bkref_str_idx);
2653	0	0	if (cache_idx != REG_MISSING)
2654			{
2655	0		const struct re_backref_cache_entry *entry
2656	0		= mctx->bkref_ents + cache_idx;
2657			do
2658	0	0	if (entry->node == bkref_node)
2659	0		return REG_NOERROR; /* We already checked it. */
2660	0	0	while (entry++->more);
2661			}
2662
2663	0		subexp_num = dfa->nodes[bkref_node].opr.idx;
2664
2665			/* For each sub expression */
2666	0	0	for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2667			{
2668			reg_errcode_t err;
2669	0		re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2670			re_sub_match_last_t *sub_last;
2671			Idx sub_last_idx, sl_str, bkref_str_off;
2672
2673	0	0	if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2674	0		continue; /* It isn't related. */
2675
2676	0		sl_str = sub_top->str_idx;
2677	0		bkref_str_off = bkref_str_idx;
2678			/* At first, check the last node of sub expressions we already
2679			evaluated. */
2680	0	0	for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2681			{
2682			regoff_t sl_str_diff;
2683	0		sub_last = sub_top->lasts[sub_last_idx];
2684	0		sl_str_diff = sub_last->str_idx - sl_str;
2685			/* The matched string by the sub expression match with the substring
2686			at the back reference? */
2687	0	0	if (sl_str_diff > 0)
2688			{
2689	0	0	if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
2690			{
2691			/* Not enough chars for a successful match. */
2692	0	0	if (bkref_str_off + sl_str_diff > mctx->input.len)
2693	0		break;
2694
2695	0		err = clean_state_log_if_needed (aTHX_ mctx,
2696			bkref_str_off
2697			+ sl_str_diff);
2698	0	0	if (BE (err != REG_NOERROR, 0))
2699	0		return err;
2700	0		buf = (const char *) re_string_get_buffer (&mctx->input);
2701			}
2702	0	0	if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2703			/* We don't need to search this sub expression any more. */
2704	0		break;
2705			}
2706	0		bkref_str_off += sl_str_diff;
2707	0		sl_str += sl_str_diff;
2708	0		err = get_subexp_sub (aTHX_ mctx, sub_top, sub_last, bkref_node,
2709			bkref_str_idx);
2710
2711			/* Reload buf, since the preceding call might have reallocated
2712			the buffer. */
2713	0		buf = (const char *) re_string_get_buffer (&mctx->input);
2714
2715	0	0	if (err == REG_NOMATCH)
2716	0		continue;
2717	0	0	if (BE (err != REG_NOERROR, 0))
2718	0		return err;
2719			}
2720
2721	0	0	if (sub_last_idx < sub_top->nlasts)
2722	0		continue;
2723	0	0	if (sub_last_idx > 0)
2724	0		++sl_str;
2725			/* Then, search for the other last nodes of the sub expression. */
2726	0	0	for (; sl_str <= bkref_str_idx; ++sl_str)
2727			{
2728			Idx cls_node;
2729			regoff_t sl_str_off;
2730			const re_node_set *nodes;
2731	0		sl_str_off = sl_str - sub_top->str_idx;
2732			/* The matched string by the sub expression match with the substring
2733			at the back reference? */
2734	0	0	if (sl_str_off > 0)
2735			{
2736	0	0	if (BE (bkref_str_off >= mctx->input.valid_len, 0))
2737			{
2738			/* If we are at the end of the input, we cannot match. */
2739	0	0	if (bkref_str_off >= mctx->input.len)
2740	0		break;
2741
2742	0		err = extend_buffers (aTHX_ mctx, bkref_str_off + 1);
2743	0	0	if (BE (err != REG_NOERROR, 0))
2744	0		return err;
2745
2746	0		buf = (const char *) re_string_get_buffer (aTHX_ &mctx->input);
2747			}
2748	0	0	if (buf [bkref_str_off++] != buf[sl_str - 1])
2749	0		break; /* We don't need to search this sub expression
2750			any more. */
2751			}
2752	0	0	if (mctx->state_log[sl_str] == NULL)
2753	0		continue;
2754			/* Does this state have a ')' of the sub expression? */
2755	0		nodes = &mctx->state_log[sl_str]->nodes;
2756	0		cls_node = find_subexp_node (aTHX_ dfa, nodes, subexp_num,
2757			OP_CLOSE_SUBEXP);
2758	0	0	if (cls_node == REG_MISSING)
2759	0		continue; /* No. */
2760	0	0	if (sub_top->path == NULL)
2761			{
2762	0	0	re_calloc(sub_top->path, state_array_t, sl_str - sub_top->str_idx + 1);
2763			}
2764			/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
2765			in the current context? */
2766	0		err = check_arrival (aTHX_ mctx, sub_top->path, sub_top->node,
2767			sub_top->str_idx, cls_node, sl_str,
2768			OP_CLOSE_SUBEXP);
2769	0	0	if (err == REG_NOMATCH)
2770	0		continue;
2771	0	0	if (BE (err != REG_NOERROR, 0))
2772	0		return err;
2773	0		sub_last = match_ctx_add_sublast (aTHX_ sub_top, cls_node, sl_str);
2774	0	0	if (BE (sub_last == NULL, 0))
2775	0		return REG_ESPACE;
2776	0		err = get_subexp_sub (aTHX_ mctx, sub_top, sub_last, bkref_node,
2777			bkref_str_idx);
2778	0	0	if (err == REG_NOMATCH)
2779	0		continue;
2780			}
2781			}
2782	0		return REG_NOERROR;
2783			}
2784
2785			/* Helper functions for get_subexp(). */
2786
2787			/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
2788			If it can arrive, register the sub expression expressed with SUB_TOP
2789			and SUB_LAST. */
2790
2791			static reg_errcode_t
2792			internal_function
2793	0		get_subexp_sub (pTHX_ re_match_context_t mctx, const re_sub_match_top_t sub_top,
2794			re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2795			{
2796			reg_errcode_t err;
2797			Idx to_idx;
2798			/* Can the subexpression arrive the back reference? */
2799	0		err = check_arrival (aTHX_ mctx, &sub_last->path, sub_last->node,
2800			sub_last->str_idx, bkref_node, bkref_str,
2801			OP_OPEN_SUBEXP);
2802	0	0	if (err != REG_NOERROR)
2803	0		return err;
2804	0		err = match_ctx_add_entry (aTHX_ mctx, bkref_node, bkref_str, sub_top->str_idx,
2805			sub_last->str_idx);
2806	0	0	if (BE (err != REG_NOERROR, 0))
2807	0		return err;
2808	0		to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2809	0		return clean_state_log_if_needed (aTHX_ mctx, to_idx);
2810			}
2811
2812			/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
2813			Search '(' if FL_OPEN, or search ')' otherwise.
2814			TODO: This function isn't efficient...
2815			Because there might be more than one nodes whose types are
2816			OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2817			nodes.
2818			E.g. RE: (a){2} */
2819
2820			static Idx
2821			internal_function
2822	0		find_subexp_node (pTHX_ const re_dfa_t dfa, const re_node_set nodes,
2823			Idx subexp_idx, int type)
2824			{
2825			Idx cls_idx;
2826	0	0	for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2827			{
2828	0		Idx cls_node = nodes->elems[cls_idx];
2829	0		const re_token_t *node = dfa->nodes + cls_node;
2830	0	0	if (node->type == type
2831	0	0	&& node->opr.idx == subexp_idx)
2832	0		return cls_node;
2833			}
2834	0		return REG_MISSING;
2835			}
2836
2837			/* Check whether the node TOP_NODE at TOP_STR can arrive to the node
2838			LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2839			heavily reused.
2840			Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */
2841
2842			static reg_errcode_t
2843			internal_function __attribute_warn_unused_result__
2844	0		check_arrival (pTHX_ re_match_context_t mctx, state_array_t path, Idx top_node,
2845			Idx top_str, Idx last_node, Idx last_str, int type)
2846			{
2847	0		const re_dfa_t *const dfa = mctx->dfa;
2848	0		reg_errcode_t err = REG_NOERROR;
2849			Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2850	0		re_dfastate_t *cur_state = NULL;
2851			re_node_set *cur_nodes, next_nodes;
2852			re_dfastate_t **backup_state_log;
2853			unsigned int context;
2854
2855	0		subexp_num = dfa->nodes[top_node].opr.idx;
2856			/* Extend the buffer if we need. */
2857	0	0	if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
2858			{
2859	0		Idx old_alloc = path->alloc;
2860	0		Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2861			Idx new_alloc;
2862	0	0	if (BE (IDX_MAX - old_alloc < incr_alloc, 0))
2863	0		return REG_ESPACE;
2864	0		new_alloc = old_alloc + incr_alloc;
2865	0	0	if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
2866	0		return REG_ESPACE;
2867	0	0	re_realloc (path->array, re_dfastate_t *, new_alloc);
2868	0		path->alloc = new_alloc;
2869	0		memset (path->array + old_alloc, '\0',
2870	0		sizeof (re_dfastate_t ) (path->alloc - old_alloc));
2871			}
2872
2873	0	0	str_idx = path->next_idx ? path->next_idx : top_str;
2874
2875			/* Temporary modify MCTX. */
2876	0		backup_state_log = mctx->state_log;
2877	0		backup_cur_idx = mctx->input.cur_idx;
2878	0		mctx->state_log = path->array;
2879	0		mctx->input.cur_idx = str_idx;
2880
2881			/* Setup initial node set. */
2882	0		context = re_string_context_at (aTHX_ &mctx->input, str_idx - 1, mctx->eflags);
2883	0	0	if (str_idx == top_str)
2884			{
2885	0		err = re_node_set_init_1 (aTHX_ &next_nodes, top_node);
2886	0	0	if (BE (err != REG_NOERROR, 0))
2887	0		return err;
2888	0		err = check_arrival_expand_ecl (aTHX_ dfa, &next_nodes, subexp_num, type);
2889	0	0	if (BE (err != REG_NOERROR, 0))
2890			{
2891	0	0	re_node_set_free (&next_nodes);
2892	0		return err;
2893			}
2894			}
2895			else
2896			{
2897	0		cur_state = mctx->state_log[str_idx];
2898	0	0	if (cur_state && cur_state->has_backref)
		0
2899			{
2900	0		err = re_node_set_init_copy (aTHX_ &next_nodes, &cur_state->nodes);
2901	0	0	if (BE (err != REG_NOERROR, 0))
2902	0		return err;
2903			}
2904			else
2905	0		re_node_set_init_empty (&next_nodes);
2906			}
2907	0	0	if (str_idx == top_str \|\| (cur_state && cur_state->has_backref))
		0
		0
2908			{
2909	0	0	if (next_nodes.nelem)
2910			{
2911	0		err = expand_bkref_cache (aTHX_ mctx, &next_nodes, str_idx,
2912			subexp_num, type);
2913	0	0	if (BE (err != REG_NOERROR, 0))
2914			{
2915	0	0	re_node_set_free (&next_nodes);
2916	0		return err;
2917			}
2918			}
2919	0		cur_state = re_acquire_state_context (aTHX_ &err, dfa, &next_nodes, context);
2920	0	0	if (BE (cur_state == NULL && err != REG_NOERROR, 0))
		0
2921			{
2922	0	0	re_node_set_free (&next_nodes);
2923	0		return err;
2924			}
2925	0		mctx->state_log[str_idx] = cur_state;
2926			}
2927
2928	0	0	for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
		0
2929			{
2930	0		re_node_set_empty (aTHX_ &next_nodes);
2931	0	0	if (mctx->state_log[str_idx + 1])
2932			{
2933	0		err = re_node_set_merge (aTHX_ &next_nodes,
2934	0		&mctx->state_log[str_idx + 1]->nodes);
2935	0	0	if (BE (err != REG_NOERROR, 0))
2936			{
2937	0	0	re_node_set_free (&next_nodes);
2938	0		return err;
2939			}
2940			}
2941	0	0	if (cur_state)
2942			{
2943	0		err = check_arrival_add_next_nodes (aTHX_ mctx, str_idx,
2944			&cur_state->non_eps_nodes,
2945			&next_nodes);
2946	0	0	if (BE (err != REG_NOERROR, 0))
2947			{
2948	0	0	re_node_set_free (&next_nodes);
2949	0		return err;
2950			}
2951			}
2952	0		++str_idx;
2953	0	0	if (next_nodes.nelem)
2954			{
2955	0		err = check_arrival_expand_ecl (aTHX_ dfa, &next_nodes, subexp_num, type);
2956	0	0	if (BE (err != REG_NOERROR, 0))
2957			{
2958	0	0	re_node_set_free (&next_nodes);
2959	0		return err;
2960			}
2961	0		err = expand_bkref_cache (aTHX_ mctx, &next_nodes, str_idx,
2962			subexp_num, type);
2963	0	0	if (BE (err != REG_NOERROR, 0))
2964			{
2965	0	0	re_node_set_free (&next_nodes);
2966	0		return err;
2967			}
2968			}
2969	0		context = re_string_context_at (aTHX_ &mctx->input, str_idx - 1, mctx->eflags);
2970	0		cur_state = re_acquire_state_context (aTHX_ &err, dfa, &next_nodes, context);
2971	0	0	if (BE (cur_state == NULL && err != REG_NOERROR, 0))
		0
2972			{
2973	0	0	re_node_set_free (&next_nodes);
2974	0		return err;
2975			}
2976	0		mctx->state_log[str_idx] = cur_state;
2977	0	0	null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2978			}
2979	0	0	re_node_set_free (&next_nodes);
2980	0		cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2981	0	0	: &mctx->state_log[last_str]->nodes);
2982	0		path->next_idx = str_idx;
2983
2984			/* Fix MCTX. */
2985	0		mctx->state_log = backup_state_log;
2986	0		mctx->input.cur_idx = backup_cur_idx;
2987
2988			/* Then check the current node set has the node LAST_NODE. */
2989	0	0	if (cur_nodes != NULL && re_node_set_contains (aTHX_ cur_nodes, last_node))
		0
2990	0		return REG_NOERROR;
2991
2992	0		return REG_NOMATCH;
2993			}
2994
2995			/* Helper functions for check_arrival. */
2996
2997			/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
2998			to NEXT_NODES.
2999			TODO: This function is similar to the functions transit_state*(),
3000			however this function has many additional works.
3001			Can't we unify them? */
3002
3003			static reg_errcode_t
3004			internal_function __attribute_warn_unused_result__
3005	0		check_arrival_add_next_nodes (pTHX_ re_match_context_t *mctx, Idx str_idx,
3006			re_node_set cur_nodes, re_node_set next_nodes)
3007			{
3008	0		const re_dfa_t *const dfa = mctx->dfa;
3009			bool ok;
3010			Idx cur_idx;
3011			#ifdef RE_ENABLE_I18N
3012	0		reg_errcode_t err = REG_NOERROR;
3013			#endif
3014			re_node_set union_set;
3015	0		re_node_set_init_empty (&union_set);
3016	0	0	for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
3017			{
3018	0		int naccepted = 0;
3019	0		Idx cur_node = cur_nodes->elems[cur_idx];
3020			#ifdef DEBUG
3021			re_token_type_t type = dfa->nodes[cur_node].type;
3022			assert (!IS_EPSILON_NODE (type));
3023			#endif
3024			#ifdef RE_ENABLE_I18N
3025			/* If the node may accept "multi byte". */
3026	0	0	if (dfa->nodes[cur_node].accept_mb)
3027			{
3028	0		naccepted = check_node_accept_bytes (aTHX_ dfa, cur_node, &mctx->input, mctx->sv,
3029			str_idx);
3030	0	0	if (naccepted > 1)
3031			{
3032			re_dfastate_t *dest_state;
3033	0		Idx next_node = dfa->nexts[cur_node];
3034	0		Idx next_idx = str_idx + naccepted;
3035	0		dest_state = mctx->state_log[next_idx];
3036	0		re_node_set_empty (aTHX_ &union_set);
3037	0	0	if (dest_state)
3038			{
3039	0		err = re_node_set_merge (aTHX_ &union_set, &dest_state->nodes);
3040	0	0	if (BE (err != REG_NOERROR, 0))
3041			{
3042	0	0	re_node_set_free (&union_set);
3043	0		return err;
3044			}
3045			}
3046	0		ok = re_node_set_insert (aTHX_ &union_set, next_node);
3047	0	0	if (BE (! ok, 0))
3048			{
3049	0	0	re_node_set_free (&union_set);
3050	0		return REG_ESPACE;
3051			}
3052	0		mctx->state_log[next_idx] = re_acquire_state (aTHX_ &err, dfa,
3053			&union_set);
3054	0	0	if (BE (mctx->state_log[next_idx] == NULL
		0
3055			&& err != REG_NOERROR, 0))
3056			{
3057	0	0	re_node_set_free (&union_set);
3058	0		return err;
3059			}
3060			}
3061			}
3062			#endif /* RE_ENABLE_I18N */
3063	0	0	if (naccepted
3064	0	0	\|\| check_node_accept (aTHX_ mctx, dfa->nodes + cur_node, str_idx))
3065			{
3066	0		ok = re_node_set_insert (aTHX_ next_nodes, dfa->nexts[cur_node]);
3067	0	0	if (BE (! ok, 0))
3068			{
3069	0	0	re_node_set_free (&union_set);
3070	0		return REG_ESPACE;
3071			}
3072			}
3073			}
3074	0	0	re_node_set_free (&union_set);
3075	0		return REG_NOERROR;
3076			}
3077
3078			/* For all the nodes in CUR_NODES, add the epsilon closures of them to
3079			CUR_NODES, however exclude the nodes which are:
3080			- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3081			- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3082			*/
3083
3084			static reg_errcode_t
3085			internal_function
3086	0		check_arrival_expand_ecl (pTHX_ const re_dfa_t dfa, re_node_set cur_nodes,
3087			Idx ex_subexp, int type)
3088			{
3089			reg_errcode_t err;
3090			Idx idx, outside_node;
3091			re_node_set new_nodes;
3092			#ifdef DEBUG
3093			assert (cur_nodes->nelem);
3094			#endif
3095	0		err = re_node_set_alloc (aTHX_ &new_nodes, cur_nodes->nelem);
3096	0	0	if (BE (err != REG_NOERROR, 0))
3097	0		return err;
3098			/* Create a new node set NEW_NODES with the nodes which are epsilon
3099			closures of the node in CUR_NODES. */
3100
3101	0	0	for (idx = 0; idx < cur_nodes->nelem; ++idx)
3102			{
3103	0		Idx cur_node = cur_nodes->elems[idx];
3104	0		const re_node_set *eclosure = dfa->eclosures + cur_node;
3105	0		outside_node = find_subexp_node (aTHX_ dfa, eclosure, ex_subexp, type);
3106	0	0	if (outside_node == REG_MISSING)
3107			{
3108			/* There are no problematic nodes, just merge them. */
3109	0		err = re_node_set_merge (aTHX_ &new_nodes, eclosure);
3110	0	0	if (BE (err != REG_NOERROR, 0))
3111			{
3112	0	0	re_node_set_free (&new_nodes);
3113	0		return err;
3114			}
3115			}
3116			else
3117			{
3118			/* There are problematic nodes, re-calculate incrementally. */
3119	0		err = check_arrival_expand_ecl_sub (aTHX_ dfa, &new_nodes, cur_node,
3120			ex_subexp, type);
3121	0	0	if (BE (err != REG_NOERROR, 0))
3122			{
3123	0	0	re_node_set_free (&new_nodes);
3124	0		return err;
3125			}
3126			}
3127			}
3128	0	0	re_node_set_free (cur_nodes);
3129	0		*cur_nodes = new_nodes;
3130	0		return REG_NOERROR;
3131			}
3132
3133			/* Helper function for check_arrival_expand_ecl.
3134			Check incrementally the epsilon closure of TARGET, and if it isn't
3135			problematic append it to DST_NODES. */
3136
3137			static reg_errcode_t
3138			internal_function __attribute_warn_unused_result__
3139	0		check_arrival_expand_ecl_sub (pTHX_ const re_dfa_t dfa, re_node_set dst_nodes,
3140			Idx target, Idx ex_subexp, int type)
3141			{
3142			Idx cur_node;
3143	0	0	for (cur_node = target; !re_node_set_contains (aTHX_ dst_nodes, cur_node);)
3144			{
3145			bool ok;
3146
3147	0	0	if (dfa->nodes[cur_node].type == type
3148	0	0	&& dfa->nodes[cur_node].opr.idx == ex_subexp)
3149			{
3150	0	0	if (type == OP_CLOSE_SUBEXP)
3151			{
3152	0		ok = re_node_set_insert (aTHX_ dst_nodes, cur_node);
3153	0	0	if (BE (! ok, 0))
3154	0		return REG_ESPACE;
3155			}
3156	0		break;
3157			}
3158	0		ok = re_node_set_insert (aTHX_ dst_nodes, cur_node);
3159	0	0	if (BE (! ok, 0))
3160	0		return REG_ESPACE;
3161	0	0	if (dfa->edests[cur_node].nelem == 0)
3162	0		break;
3163	0	0	if (dfa->edests[cur_node].nelem == 2)
3164			{
3165			reg_errcode_t err;
3166	0		err = check_arrival_expand_ecl_sub (aTHX_ dfa, dst_nodes,
3167	0		dfa->edests[cur_node].elems[1],
3168			ex_subexp, type);
3169	0	0	if (BE (err != REG_NOERROR, 0))
3170	0		return err;
3171			}
3172	0		cur_node = dfa->edests[cur_node].elems[0];
3173			}
3174	0		return REG_NOERROR;
3175			}
3176
3177
3178			/* For all the back references in the current state, calculate the
3179			destination of the back references by the appropriate entry
3180			in MCTX->BKREF_ENTS. */
3181
3182			static reg_errcode_t
3183			internal_function __attribute_warn_unused_result__
3184	0		expand_bkref_cache (pTHX_ re_match_context_t mctx, re_node_set cur_nodes,
3185			Idx cur_str, Idx subexp_num, int type)
3186			{
3187	0		const re_dfa_t *const dfa = mctx->dfa;
3188			reg_errcode_t err;
3189	0		Idx cache_idx_start = search_cur_bkref_entry (aTHX_ mctx, cur_str);
3190			struct re_backref_cache_entry *ent;
3191
3192	0	0	if (cache_idx_start == REG_MISSING)
3193	0		return REG_NOERROR;
3194
3195			restart:
3196	0		ent = mctx->bkref_ents + cache_idx_start;
3197			do
3198			{
3199			Idx to_idx, next_node;
3200
3201			/* Is this entry ENT is appropriate? */
3202	0	0	if (!re_node_set_contains (aTHX_ cur_nodes, ent->node))
3203	0		continue; /* No. */
3204
3205	0		to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3206			/* Calculate the destination of the back reference, and append it
3207			to MCTX->STATE_LOG. */
3208	0	0	if (to_idx == cur_str)
3209			{
3210			/* The backreference did epsilon transit, we must re-check all the
3211			node in the current state. */
3212			re_node_set new_dests;
3213			reg_errcode_t err2, err3;
3214	0		next_node = dfa->edests[ent->node].elems[0];
3215	0	0	if (re_node_set_contains (aTHX_ cur_nodes, next_node))
3216	0		continue;
3217	0		err = re_node_set_init_1 (aTHX_ &new_dests, next_node);
3218	0		err2 = check_arrival_expand_ecl (aTHX_ dfa, &new_dests, subexp_num, type);
3219	0		err3 = re_node_set_merge (aTHX_ cur_nodes, &new_dests);
3220	0	0	re_node_set_free (&new_dests);
3221	0	0	if (BE (err != REG_NOERROR \|\| err2 != REG_NOERROR
		0
		0
		0
3222			\|\| err3 != REG_NOERROR, 0))
3223			{
3224	0	0	err = (err != REG_NOERROR ? err
		0
3225			: (err2 != REG_NOERROR ? err2 : err3));
3226	0		return err;
3227			}
3228			/* TODO: It is still inefficient... */
3229	0		goto restart;
3230			}
3231			else
3232			{
3233			re_node_set union_set;
3234	0		next_node = dfa->nexts[ent->node];
3235	0	0	if (mctx->state_log[to_idx])
3236			{
3237			bool ok;
3238	0	0	if (re_node_set_contains (aTHX_ &mctx->state_log[to_idx]->nodes,
3239			next_node))
3240	0		continue;
3241	0		err = re_node_set_init_copy (aTHX_ &union_set,
3242	0		&mctx->state_log[to_idx]->nodes);
3243	0		ok = re_node_set_insert (aTHX_ &union_set, next_node);
3244	0	0	if (BE (err != REG_NOERROR \|\| ! ok, 0))
		0
3245			{
3246	0	0	re_node_set_free (&union_set);
3247	0	0	err = err != REG_NOERROR ? err : REG_ESPACE;
3248	0		return err;
3249			}
3250			}
3251			else
3252			{
3253	0		err = re_node_set_init_1 (aTHX_ &union_set, next_node);
3254	0	0	if (BE (err != REG_NOERROR, 0))
3255	0		return err;
3256			}
3257	0		mctx->state_log[to_idx] = re_acquire_state (aTHX_ &err, dfa, &union_set);
3258	0	0	re_node_set_free (&union_set);
3259	0	0	if (BE (mctx->state_log[to_idx] == NULL
		0
3260			&& err != REG_NOERROR, 0))
3261	0		return err;
3262			}
3263			}
3264	0	0	while (ent++->more);
3265	0		return REG_NOERROR;
3266			}
3267
3268			/* Build transition table for the state.
3269			Return true if successful. */
3270
3271			static bool
3272			internal_function
3273	19		build_trtable (pTHX_ const re_dfa_t dfa, re_dfastate_t state)
3274			{
3275			reg_errcode_t err;
3276			Idx i, j;
3277			int ch;
3278	19		bool need_word_trtable = false;
3279			bitset_word_t elem, mask;
3280	19		bool dests_node_malloced = false;
3281	19		bool dest_states_malloced = false;
3282			Idx ndests; /* Number of the destination states from 'state'. */
3283			re_dfastate_t **trtable;
3284	19		re_dfastate_t dest_states = NULL, dest_states_word, **dest_states_nl;
3285			re_node_set follows, *dests_node;
3286			bitset_t *dests_ch;
3287			bitset_t acceptable;
3288
3289			struct dests_alloc
3290			{
3291			re_node_set dests_node[SBC_MAX];
3292			bitset_t dests_ch[SBC_MAX];
3293			} *dests_alloc;
3294
3295			/* We build DFA states which corresponds to the destination nodes
3296			from 'state'. 'dests_node[i]' represents the nodes which i-th
3297			destination state contains, and 'dests_ch[i]' represents the
3298			characters which i-th destination state accepts. */
3299			if (__libc_use_alloca (sizeof (struct dests_alloc)))
3300			dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
3301			else
3302			{
3303	19		re_malloc (dests_alloc, struct dests_alloc, 1);
3304	19		dests_node_malloced = true;
3305			}
3306	19		dests_node = dests_alloc->dests_node;
3307	19		dests_ch = dests_alloc->dests_ch;
3308
3309			/* Initialize transition table. */
3310	19		state->word_trtable = state->trtable = NULL;
3311
3312			/* At first, group all nodes belonging to 'state' into several
3313			destinations. */
3314	19		ndests = group_nodes_into_DFAstates (aTHX_ dfa, state, dests_node, dests_ch);
3315	19	100	if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
3316			{
3317	2	50	if (dests_node_malloced)
3318	2	50	re_free (dests_alloc);
3319			/* Return false in case of an error, true otherwise. */
3320	2	50	if (ndests == 0)
3321			{
3322	2		re_calloc(state->trtable, re_dfastate_t *, SBC_MAX);
3323	2		return true;
3324			}
3325	0		return false;
3326			}
3327
3328	17		err = re_node_set_alloc (aTHX_ &follows, ndests + 1);
3329	17	50	if (BE (err != REG_NOERROR, 0))
3330	0		goto out_free;
3331
3332			/* Avoid arithmetic overflow in size calculation. */
3333	17	50	if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3334			/ (3 * sizeof (re_dfastate_t *)))
3335			< ndests),
3336			0))
3337	0		goto out_free;
3338
3339			if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3340			+ ndests * 3 * sizeof (re_dfastate_t *)))
3341			dest_states = (re_dfastate_t **)
3342			alloca (ndests * 3 * sizeof (re_dfastate_t *));
3343			else
3344			{
3345			/*
3346			dest_states = (re_dfastate_t **)
3347			malloc (ndests * 3 * sizeof (re_dfastate_t *));
3348			*/
3349	17	50	Newx(dest_states, ndests * 3 * sizeof (re_dfastate_t ), re_dfastate_t );
3350	17	50	if (BE (dest_states == NULL, 0))
3351			{
3352			out_free:
3353	0	0	if (dest_states_malloced)
3354	0	0	re_free (dest_states);
3355	0	0	re_node_set_free (&follows);
3356	0	0	for (i = 0; i < ndests; ++i)
3357	0	0	re_node_set_free (dests_node + i);
3358	0	0	if (dests_node_malloced)
3359	0	0	re_free (dests_alloc);
3360	0		return false;
3361			}
3362	17		dest_states_malloced = true;
3363			}
3364	17		dest_states_word = dest_states + ndests;
3365	17		dest_states_nl = dest_states_word + ndests;
3366	17		bitset_empty (aTHX_ acceptable);
3367
3368			/* Then build the states for all destinations. */
3369	34	100	for (i = 0; i < ndests; ++i)
3370			{
3371			Idx next_node;
3372	17		re_node_set_empty (aTHX_ &follows);
3373			/* Merge the follows of this destination states. */
3374	34	100	for (j = 0; j < dests_node[i].nelem; ++j)
3375			{
3376	17		next_node = dfa->nexts[dests_node[i].elems[j]];
3377	17	50	if (next_node != REG_MISSING)
3378			{
3379	17		err = re_node_set_merge (aTHX_ &follows, dfa->eclosures + next_node);
3380	17	50	if (BE (err != REG_NOERROR, 0))
3381	0		goto out_free;
3382			}
3383			}
3384	17		dest_states[i] = re_acquire_state_context (aTHX_ &err, dfa, &follows, 0);
3385	17	50	if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
		0
3386	0		goto out_free;
3387			/* If the new state has context constraint,
3388			build appropriate states for these contexts. */
3389	17	50	if (dest_states[i]->has_constraint)
3390			{
3391	0		dest_states_word[i] = re_acquire_state_context (aTHX_ &err, dfa, &follows,
3392			CONTEXT_WORD);
3393	0	0	if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
		0
3394	0		goto out_free;
3395
3396	0	0	if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
		0
3397	0		need_word_trtable = true;
3398
3399	0		dest_states_nl[i] = re_acquire_state_context (aTHX_ &err, dfa, &follows,
3400			CONTEXT_NEWLINE);
3401	0	0	if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
		0
3402	0		goto out_free;
3403			}
3404			else
3405			{
3406	17		dest_states_word[i] = dest_states[i];
3407	17		dest_states_nl[i] = dest_states[i];
3408			}
3409	17		bitset_merge (aTHX_ acceptable, dests_ch[i]);
3410			}
3411
3412	17	50	if (!BE (need_word_trtable, 0))
3413			{
3414			/* We don't care about whether the following character is a word
3415			character, or we are in a single-byte character set so we can
3416			discern by looking at the character code: allocate a
3417			256-entry transition table. */
3418	17		re_calloc(state->trtable, re_dfastate_t *, SBC_MAX);
3419	17		trtable = state->trtable;
3420
3421			/* For all characters ch...: */
3422	85	100	for (i = 0; i < BITSET_WORDS; ++i)
3423	994	100	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3424			elem;
3425	926		mask <<= 1, elem >>= 1, ++ch)
3426	926	100	if (BE (elem & 1, 0))
3427			{
3428			/* There must be exactly one destination which accepts
3429			character ch. See group_nodes_into_DFAstates. */
3430	269	50	for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3431			;
3432
3433			/* j-th destination accepts the word character ch. */
3434	269	50	if (dfa->word_char[i] & mask)
3435	0		trtable[ch] = dest_states_word[j];
3436			else
3437	269		trtable[ch] = dest_states[j];
3438			}
3439			}
3440			else
3441			{
3442			/* We care about whether the following character is a word
3443			character, and we are in a multi-byte character set: discern
3444			by looking at the character code: build two 256-entry
3445			transition tables, one starting at trtable[0] and one
3446			starting at trtable[SBC_MAX]. */
3447	0		re_calloc(state->word_trtable, re_dfastate_t , 2 SBC_MAX);
3448	0		trtable = state->word_trtable;
3449
3450			/* For all characters ch...: */
3451	0	0	for (i = 0; i < BITSET_WORDS; ++i)
3452	0	0	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3453			elem;
3454	0		mask <<= 1, elem >>= 1, ++ch)
3455	0	0	if (BE (elem & 1, 0))
3456			{
3457			/* There must be exactly one destination which accepts
3458			character ch. See group_nodes_into_DFAstates. */
3459	0	0	for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3460			;
3461
3462			/* j-th destination accepts the word character ch. */
3463	0		trtable[ch] = dest_states[j];
3464	0		trtable[ch + SBC_MAX] = dest_states_word[j];
3465			}
3466			}
3467
3468			/* new line */
3469	17	100	if (bitset_contain (aTHX_ acceptable, NEWLINE_CHAR))
3470			{
3471			/* The current state accepts newline character. */
3472	2	50	for (j = 0; j < ndests; ++j)
3473	2	50	if (bitset_contain (aTHX_ dests_ch[j], NEWLINE_CHAR))
3474			{
3475			/* k-th destination accepts newline character. */
3476	2		trtable[NEWLINE_CHAR] = dest_states_nl[j];
3477	2	50	if (need_word_trtable)
3478	0		trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3479			/* There must be only one destination which accepts
3480			newline. See group_nodes_into_DFAstates. */
3481	2		break;
3482			}
3483			}
3484
3485	17	50	if (dest_states_malloced)
3486	17	50	re_free (dest_states);
3487
3488	17	50	re_node_set_free (&follows);
3489	34	100	for (i = 0; i < ndests; ++i)
3490	17	50	re_node_set_free (dests_node + i);
3491
3492	17	50	if (dests_node_malloced)
3493	17	50	re_free (dests_alloc);
3494
3495	19		return true;
3496			}
3497
3498			/* Group all nodes belonging to STATE into several destinations.
3499			Then for all destinations, set the nodes belonging to the destination
3500			to DESTS_NODE[i] and set the characters accepted by the destination
3501			to DEST_CH[i]. This function return the number of destinations. */
3502
3503			static Idx
3504			internal_function
3505	19		group_nodes_into_DFAstates (pTHX_ const re_dfa_t dfa, const re_dfastate_t state,
3506			re_node_set dests_node, bitset_t dests_ch)
3507			{
3508			reg_errcode_t err;
3509			bool ok;
3510			Idx i, j, k;
3511			Idx ndests; /* Number of the destinations from 'state'. */
3512			bitset_t accepts; /* Characters a node can accept. */
3513	19		const re_node_set *cur_nodes = &state->nodes;
3514	19		bitset_empty (aTHX_ accepts);
3515	19		ndests = 0;
3516
3517			/* For all the nodes belonging to 'state', */
3518	54	100	for (i = 0; i < cur_nodes->nelem; ++i)
3519			{
3520	35		re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3521	35		re_token_type_t type = node->type;
3522	35		unsigned int constraint = node->constraint;
3523
3524			/* Enumerate all single byte character this node can accept. */
3525	35	100	if (type == CHARACTER)
3526	15		bitset_set (aTHX_ accepts, node->opr.c);
3527	20	100	else if (type == SIMPLE_BRACKET)
3528			{
3529	2		bitset_merge (aTHX_ accepts, node->opr.sbcset);
3530			}
3531	18	50	else if (type == OP_PERIOD)
3532			{
3533			#ifdef RE_ENABLE_I18N
3534	0	0	if (dfa->mb_cur_max > 1)
3535	0		bitset_merge (aTHX_ accepts, dfa->sb_char);
3536			else
3537			#endif
3538	0		bitset_set_all (aTHX_ accepts);
3539	0	0	if (!(dfa->syntax & RE_DOT_NEWLINE))
3540	0		bitset_clear (accepts, '\n');
3541	0	0	if (dfa->syntax & RE_DOT_NOT_NULL)
3542	0		bitset_clear (accepts, '\0');
3543			}
3544			#ifdef RE_ENABLE_I18N
3545	18	50	else if (type == OP_UTF8_PERIOD)
3546			{
3547			if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3548	0		memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3549			else
3550			bitset_merge (aTHX_ accepts, utf8_sb_map);
3551	0	0	if (!(dfa->syntax & RE_DOT_NEWLINE))
3552	0		bitset_clear (accepts, '\n');
3553	0	0	if (dfa->syntax & RE_DOT_NOT_NULL)
3554	0		bitset_clear (accepts, '\0');
3555			}
3556			#endif
3557			else
3558	18		continue;
3559
3560			/* Check the 'accepts' and sift the characters which are not
3561			match it the context. */
3562	17	50	if (constraint)
3563			{
3564	0	0	if (constraint & NEXT_NEWLINE_CONSTRAINT)
3565			{
3566	0		bool accepts_newline = bitset_contain (aTHX_ accepts, NEWLINE_CHAR);
3567	0		bitset_empty (aTHX_ accepts);
3568	0	0	if (accepts_newline)
3569	0		bitset_set (aTHX_ accepts, NEWLINE_CHAR);
3570			else
3571	0		continue;
3572			}
3573	0	0	if (constraint & NEXT_ENDBUF_CONSTRAINT)
3574			{
3575	0		bitset_empty (aTHX_ accepts);
3576	0		continue;
3577			}
3578
3579	0	0	if (constraint & NEXT_WORD_CONSTRAINT)
3580			{
3581	0		bitset_word_t any_set = 0;
3582	0	0	if (type == CHARACTER && !node->word_char)
		0
3583			{
3584	0		bitset_empty (aTHX_ accepts);
3585	0		continue;
3586			}
3587			#ifdef RE_ENABLE_I18N
3588	0	0	if (dfa->mb_cur_max > 1)
3589	0	0	for (j = 0; j < BITSET_WORDS; ++j)
3590	0		any_set \|= (accepts[j] &= (dfa->word_char[j] \| ~dfa->sb_char[j]));
3591			else
3592			#endif
3593	0	0	for (j = 0; j < BITSET_WORDS; ++j)
3594	0		any_set \|= (accepts[j] &= dfa->word_char[j]);
3595	0	0	if (!any_set)
3596	0		continue;
3597			}
3598	0	0	if (constraint & NEXT_NOTWORD_CONSTRAINT)
3599			{
3600	0		bitset_word_t any_set = 0;
3601	0	0	if (type == CHARACTER && node->word_char)
		0
3602			{
3603	0		bitset_empty (aTHX_ accepts);
3604	0		continue;
3605			}
3606			#ifdef RE_ENABLE_I18N
3607	0	0	if (dfa->mb_cur_max > 1)
3608	0	0	for (j = 0; j < BITSET_WORDS; ++j)
3609	0		any_set \|= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3610			else
3611			#endif
3612	0	0	for (j = 0; j < BITSET_WORDS; ++j)
3613	0		any_set \|= (accepts[j] &= ~dfa->word_char[j]);
3614	0	0	if (!any_set)
3615	0		continue;
3616			}
3617			}
3618
3619			/* Then divide 'accepts' into DFA states, or create a new
3620			state. Above, we make sure that accepts is not empty. */
3621	17	50	for (j = 0; j < ndests; ++j)
3622			{
3623			bitset_t intersec; /* Intersection sets, see below. */
3624			bitset_t remains;
3625			/* Flags, see below. */
3626			bitset_word_t has_intersec, not_subset, not_consumed;
3627
3628			/* Optimization, skip if this state doesn't accept the character. */
3629	0	0	if (type == CHARACTER && !bitset_contain (aTHX_ dests_ch[j], node->opr.c))
		0
3630	0		continue;
3631
3632			/* Enumerate the intersection set of this state and 'accepts'. */
3633	0		has_intersec = 0;
3634	0	0	for (k = 0; k < BITSET_WORDS; ++k)
3635	0		has_intersec \|= intersec[k] = accepts[k] & dests_ch[j][k];
3636			/* And skip if the intersection set is empty. */
3637	0	0	if (!has_intersec)
3638	0		continue;
3639
3640			/* Then check if this state is a subset of 'accepts'. */
3641	0		not_subset = not_consumed = 0;
3642	0	0	for (k = 0; k < BITSET_WORDS; ++k)
3643			{
3644	0		not_subset \|= remains[k] = ~accepts[k] & dests_ch[j][k];
3645	0		not_consumed \|= accepts[k] = accepts[k] & ~dests_ch[j][k];
3646			}
3647
3648			/* If this state isn't a subset of 'accepts', create a
3649			new group state, which has the 'remains'. */
3650	0	0	if (not_subset)
3651			{
3652	0		bitset_copy (aTHX_ dests_ch[ndests], remains);
3653	0		bitset_copy (aTHX_ dests_ch[j], intersec);
3654	0		err = re_node_set_init_copy (aTHX_ dests_node + ndests, &dests_node[j]);
3655	0	0	if (BE (err != REG_NOERROR, 0))
3656	0		goto error_return;
3657	0		++ndests;
3658			}
3659
3660			/* Put the position in the current group. */
3661	0		ok = re_node_set_insert (aTHX_ &dests_node[j], cur_nodes->elems[i]);
3662	0	0	if (BE (! ok, 0))
3663	0		goto error_return;
3664
3665			/* If all characters are consumed, go to next node. */
3666	0	0	if (!not_consumed)
3667	0		break;
3668			}
3669			/* Some characters remain, create a new group. */
3670	17	50	if (j == ndests)
3671			{
3672	17		bitset_copy (aTHX_ dests_ch[ndests], accepts);
3673	17		err = re_node_set_init_1 (aTHX_ dests_node + ndests, cur_nodes->elems[i]);
3674	17	50	if (BE (err != REG_NOERROR, 0))
3675	0		goto error_return;
3676	17		++ndests;
3677	17		bitset_empty (aTHX_ accepts);
3678			}
3679			}
3680	19		return ndests;
3681			error_return:
3682	0	0	for (j = 0; j < ndests; ++j)
3683	0	0	re_node_set_free (dests_node + j);
3684	19		return REG_MISSING;
3685			}
3686
3687			#ifdef RE_ENABLE_I18N
3688			/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
3689			Return the number of the bytes the node accepts.
3690			STR_IDX is the current index of the input string.
3691
3692			This function handles the nodes which can accept one character, or
3693			one collating element like '.', '[a-z]', opposite to the other nodes
3694			can only accept one byte. */
3695
3696			static int
3697			internal_function
3698	35		check_node_accept_bytes (pTHX_ const re_dfa_t *dfa, Idx node_idx,
3699			const re_string_t input, SV sv, Idx str_idx)
3700			{
3701	35		const re_token_t *node = dfa->nodes + node_idx;
3702			int char_len, elem_len;
3703			Idx i;
3704
3705	35	50	if (BE (node->type == OP_UTF8_PERIOD, 0))
3706			{
3707	0		unsigned char c = re_string_byte_at (input, str_idx), d;
3708	0	0	if (BE (c < 0xc2, 1))
3709	0		return 0;
3710
3711	0	0	if (str_idx + 2 > input->len)
3712	0		return 0;
3713
3714	0		d = re_string_byte_at (input, str_idx + 1);
3715	0	0	if (c < 0xe0)
3716	0	0	return (d < 0x80 \|\| d > 0xbf) ? 0 : 2;
		0
3717	0	0	else if (c < 0xf0)
3718			{
3719	0		char_len = 3;
3720	0	0	if (c == 0xe0 && d < 0xa0)
		0
3721	0		return 0;
3722			}
3723	0	0	else if (c < 0xf8)
3724			{
3725	0		char_len = 4;
3726	0	0	if (c == 0xf0 && d < 0x90)
		0
3727	0		return 0;
3728			}
3729	0	0	else if (c < 0xfc)
3730			{
3731	0		char_len = 5;
3732	0	0	if (c == 0xf8 && d < 0x88)
		0
3733	0		return 0;
3734			}
3735	0	0	else if (c < 0xfe)
3736			{
3737	0		char_len = 6;
3738	0	0	if (c == 0xfc && d < 0x84)
		0
3739	0		return 0;
3740			}
3741			else
3742	0		return 0;
3743
3744	0	0	if (str_idx + char_len > input->len)
3745	0		return 0;
3746
3747	0	0	for (i = 1; i < char_len; ++i)
3748			{
3749	0		d = re_string_byte_at (input, str_idx + i);
3750	0	0	if (d < 0x80 \|\| d > 0xbf)
		0
3751	0		return 0;
3752			}
3753	0		return char_len;
3754			}
3755
3756	35		char_len = re_string_char_size_at (aTHX_ input, str_idx);
3757	35	50	if (node->type == OP_PERIOD)
3758			{
3759	0	0	if (char_len <= 1)
3760	0		return 0;
3761			/* FIXME: I don't think this if is needed, as both '\n'
3762			and '\0' are char_len == 1. */
3763			/* '.' accepts any one character except the following two cases. */
3764	0	0	if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
		0
3765	0	0	re_string_byte_at (input, str_idx) == '\n') \|\|
3766	0	0	((dfa->syntax & RE_DOT_NOT_NULL) &&
3767	0		re_string_byte_at (input, str_idx) == '\0'))
3768	0		return 0;
3769	0		return char_len;
3770			}
3771
3772	35		elem_len = re_string_elem_size_at (aTHX_ input, sv, str_idx);
3773	35	100	if ((elem_len <= 1 && char_len <= 1) \|\| char_len == 0)
		50
		50
3774	1		return 0;
3775
3776	34	50	if (node->type == COMPLEX_BRACKET)
3777			{
3778	34		const re_charset_t *cset = node->opr.mbcset;
3779			# ifdef _LIBC
3780			const unsigned char *pin
3781			= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3782			Idx j;
3783			uint32_t nrules;
3784			# endif /* _LIBC */
3785	34		int match_len = 0;
3786	34	50	wchar_t wc = ((cset->nranges \|\| cset->nchar_classes \|\| cset->nmbchars)
		50
		50
3787	0		? re_string_wchar_at (aTHX_ input, str_idx) : 0);
3788
3789			/* match with multibyte character? */
3790	34	50	for (i = 0; i < cset->nmbchars; ++i)
3791	0	0	if (wc == cset->mbchars[i])
3792			{
3793	0		match_len = char_len;
3794	0		goto check_node_accept_bytes_match;
3795			}
3796			/* match with character_class? */
3797	34	50	for (i = 0; i < cset->nchar_classes; ++i)
3798			{
3799	0		rpl__wctype_t wt = cset->char_classes[i];
3800	0	0	if (rpl__iswctype (wc, wt))
3801			{
3802	0		match_len = char_len;
3803	0		goto check_node_accept_bytes_match;
3804			}
3805			}
3806
3807			# ifdef _LIBC
3808			nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3809			if (nrules != 0)
3810			{
3811			unsigned int in_collseq = 0;
3812			const int32_t table, indirect;
3813			const unsigned char weights, extra;
3814			const char *collseqwc;
3815			/* This #include defines a local function! */
3816			# include
3817
3818			/* match with collating_symbol? */
3819			if (cset->ncoll_syms)
3820			extra = (const unsigned char *)
3821			_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3822			for (i = 0; i < cset->ncoll_syms; ++i)
3823			{
3824			const unsigned char *coll_sym = extra + cset->coll_syms[i];
3825			/* Compare the length of input collating element and
3826			the length of current collating element. */
3827			if (*coll_sym != elem_len)
3828			continue;
3829			/* Compare each bytes. */
3830			for (j = 0; j < *coll_sym; j++)
3831			if (pin[j] != coll_sym[1 + j])
3832			break;
3833			if (j == *coll_sym)
3834			{
3835			/* Match if every bytes is equal. */
3836			match_len = j;
3837			goto check_node_accept_bytes_match;
3838			}
3839			}
3840
3841			if (cset->nranges)
3842			{
3843			if (elem_len <= char_len)
3844			{
3845			collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3846			in_collseq = __collseq_table_lookup (collseqwc, wc);
3847			}
3848			else
3849			in_collseq = find_collation_sequence_value (pin, elem_len);
3850			}
3851			/* match with range expression? */
3852			/* FIXME: Implement rational ranges here, too. */
3853			for (i = 0; i < cset->nranges; ++i)
3854			if (cset->range_starts[i] <= in_collseq
3855			&& in_collseq <= cset->range_ends[i])
3856			{
3857			match_len = elem_len;
3858			goto check_node_accept_bytes_match;
3859			}
3860
3861			/* match with equivalence_class? */
3862			if (cset->nequiv_classes)
3863			{
3864			const unsigned char *cp = pin;
3865			table = (const int32_t *)
3866			_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3867			weights = (const unsigned char *)
3868			_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3869			extra = (const unsigned char *)
3870			_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3871			indirect = (const int32_t *)
3872			_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3873			int32_t idx = findidx (&cp, elem_len);
3874			if (idx > 0)
3875			for (i = 0; i < cset->nequiv_classes; ++i)
3876			{
3877			int32_t equiv_class_idx = cset->equiv_classes[i];
3878			size_t weight_len = weights[idx & 0xffffff];
3879			if (weight_len == weights[equiv_class_idx & 0xffffff]
3880			&& (idx >> 24) == (equiv_class_idx >> 24))
3881			{
3882			Idx cnt = 0;
3883
3884			idx &= 0xffffff;
3885			equiv_class_idx &= 0xffffff;
3886
3887			while (cnt <= weight_len
3888			&& (weights[equiv_class_idx + 1 + cnt]
3889			== weights[idx + 1 + cnt]))
3890			++cnt;
3891			if (cnt > weight_len)
3892			{
3893			match_len = elem_len;
3894			goto check_node_accept_bytes_match;
3895			}
3896			}
3897			}
3898			}
3899			}
3900			else
3901			# endif /* _LIBC */
3902			{
3903			/* match with range expression? */
3904	34	50	for (i = 0; i < cset->nranges; ++i)
3905			{
3906	0	0	if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
		0
3907			{
3908	0		match_len = char_len;
3909	0		goto check_node_accept_bytes_match;
3910			}
3911			}
3912			}
3913			check_node_accept_bytes_match:
3914	34	50	if (!cset->non_match)
3915	0		return match_len;
3916			else
3917			{
3918	34	50	if (match_len > 0)
3919	0		return 0;
3920			else
3921	34		return (elem_len > char_len) ? elem_len : char_len;
3922			}
3923			}
3924	0		return 0;
3925			}
3926
3927			# ifdef _LIBC
3928			static unsigned int
3929			internal_function
3930			find_collation_sequence_value (pTHX_ const unsigned char *mbs, size_t mbs_len)
3931			{
3932			uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3933			if (nrules == 0)
3934			{
3935			if (mbs_len == 1)
3936			{
3937			/* No valid character. Match it as a single byte character. */
3938			const unsigned char collseq = (const unsigned char )
3939			_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3940			return collseq[mbs[0]];
3941			}
3942			return UINT_MAX;
3943			}
3944			else
3945			{
3946			int32_t idx;
3947			const unsigned char extra = (const unsigned char )
3948			_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3949			int32_t extrasize = (const unsigned char *)
3950			_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3951
3952			for (idx = 0; idx < extrasize;)
3953			{
3954			int mbs_cnt;
3955			bool found = false;
3956			int32_t elem_mbs_len;
3957			/* Skip the name of collating element name. */
3958			idx = idx + extra[idx] + 1;
3959			elem_mbs_len = extra[idx++];
3960			if (mbs_len == elem_mbs_len)
3961			{
3962			for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3963			if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3964			break;
3965			if (mbs_cnt == elem_mbs_len)
3966			/* Found the entry. */
3967			found = true;
3968			}
3969			/* Skip the byte sequence of the collating element. */
3970			idx += elem_mbs_len;
3971			/* Adjust for the alignment. */
3972			idx = (idx + 3) & ~3;
3973			/* Skip the collation sequence value. */
3974			idx += sizeof (uint32_t);
3975			/* Skip the wide char sequence of the collating element. */
3976			idx = idx + sizeof (uint32_t) * ((int32_t ) (extra + idx) + 1);
3977			/* If we found the entry, return the sequence value. */
3978			if (found)
3979			return (uint32_t ) (extra + idx);
3980			/* Skip the collation sequence value. */
3981			idx += sizeof (uint32_t);
3982			}
3983			return UINT_MAX;
3984			}
3985			}
3986			# endif /* _LIBC */
3987			#endif /* RE_ENABLE_I18N */
3988
3989			/* Check whether the node accepts the byte which is IDX-th
3990			byte of the INPUT. */
3991
3992			static bool
3993			internal_function
3994	23		check_node_accept (pTHX_ const re_match_context_t mctx, const re_token_t node,
3995			Idx idx)
3996			{
3997			unsigned char ch;
3998	23		ch = re_string_byte_at (&mctx->input, idx);
3999	23		switch (node->type)
4000			{
4001			case CHARACTER:
4002	14	50	if (node->opr.c != ch)
4003	0		return false;
4004	14		break;
4005
4006			case SIMPLE_BRACKET:
4007	9	50	if (!bitset_contain (aTHX_ node->opr.sbcset, ch))
4008	9		return false;
4009	0		break;
4010
4011			#ifdef RE_ENABLE_I18N
4012			case OP_UTF8_PERIOD:
4013	0	0	if (ch >= ASCII_CHARS)
4014	0		return false;
4015			/* FALLTHROUGH */
4016			#endif
4017			case OP_PERIOD:
4018	0	0	if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
		0
4019	0	0	\|\| (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
		0
4020	0		return false;
4021	0		break;
4022
4023			default:
4024	0		return false;
4025			}
4026
4027	14	50	if (node->constraint)
4028			{
4029			/* The node has constraints. Check whether the current context
4030			satisfies the constraints. */
4031	0		unsigned int context = re_string_context_at (aTHX_ &mctx->input, idx,
4032			mctx->eflags);
4033	0	0	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
		0
		0
		0
		0
		0
		0
		0
4034	0		return false;
4035			}
4036
4037	14		return true;
4038			}
4039
4040			/* Extend the buffers, if the buffers have run out. */
4041
4042			static reg_errcode_t
4043			internal_function __attribute_warn_unused_result__
4044	0		extend_buffers (pTHX_ re_match_context_t *mctx, Idx min_len)
4045			{
4046			reg_errcode_t ret;
4047	0		re_string_t *pstr = &mctx->input;
4048
4049			/* Avoid overflow. */
4050	0	0	if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
4051			<= pstr->bufs_len, 0))
4052	0		return REG_ESPACE;
4053
4054			/* Double the lengths of the buffers, but allocate at least MIN_LEN. */
4055	0		ret = re_string_realloc_buffers (aTHX_ pstr,
4056	0		MAX (min_len,
4057			MIN (pstr->len, pstr->bufs_len * 2)));
4058	0	0	if (BE (ret != REG_NOERROR, 0))
4059	0		return ret;
4060
4061	0	0	if (mctx->state_log != NULL)
4062			{
4063			/* And double the length of state_log. */
4064			/* XXX We have no indication of the size of this buffer. If this
4065			allocation fail we have no indication that the state_log array
4066			does not have the right size. */
4067	0	0	re_realloc (mctx->state_log, re_dfastate_t *, pstr->bufs_len + 1);
4068			}
4069
4070			/* Then reconstruct the buffers. */
4071	0	0	if (pstr->icase)
4072			{
4073			#ifdef RE_ENABLE_I18N
4074	0	0	if (pstr->mb_cur_max > 1)
4075			{
4076	0		ret = build_wcs_upper_buffer (aTHX_ pstr);
4077	0	0	if (BE (ret != REG_NOERROR, 0))
4078	0		return ret;
4079			}
4080			else
4081			#endif /* RE_ENABLE_I18N */
4082	0		build_upper_buffer (aTHX_ pstr);
4083			}
4084			else
4085			{
4086			#ifdef RE_ENABLE_I18N
4087	0	0	if (pstr->mb_cur_max > 1)
4088	0		build_wcs_buffer (aTHX_ pstr);
4089			else
4090			#endif /* RE_ENABLE_I18N */
4091			{
4092	0	0	if (pstr->trans != NULL)
4093	0		re_string_translate_buffer (aTHX_ pstr);
4094			}
4095			}
4096	0		return REG_NOERROR;
4097			}
4098
4099
4100			/* Functions for matching context. */
4101
4102			/* Initialize MCTX. */
4103
4104			static reg_errcode_t
4105			internal_function __attribute_warn_unused_result__
4106	14		match_ctx_init (pTHX_ re_match_context_t *mctx, int eflags, Idx n)
4107			{
4108	14		mctx->eflags = eflags;
4109	14		mctx->match_last = REG_MISSING;
4110	14	50	if (n > 0)
4111			{
4112			/* Avoid overflow. */
4113	0		size_t max_object_size =
4114			MAX (sizeof (struct re_backref_cache_entry),
4115			sizeof (re_sub_match_top_t *));
4116	0	0	if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0))
4117	0		return REG_ESPACE;
4118
4119	0	0	re_malloc (mctx->bkref_ents, struct re_backref_cache_entry, n);
4120	0	0	re_malloc (mctx->sub_tops, re_sub_match_top_t *, n);
4121			}
4122			/* Already zero-ed by the caller.
4123			else
4124			mctx->bkref_ents = NULL;
4125			mctx->nbkref_ents = 0;
4126			mctx->nsub_tops = 0; */
4127	14		mctx->abkref_ents = n;
4128	14		mctx->max_mb_elem_len = 1;
4129	14		mctx->asub_tops = n;
4130	14		return REG_NOERROR;
4131			}
4132
4133			/* Clean the entries which depend on the current input in MCTX.
4134			This function must be invoked when the matcher changes the start index
4135			of the input, or changes the input string. */
4136
4137			static void
4138			internal_function
4139	23		match_ctx_clean (pTHX_ re_match_context_t *mctx)
4140			{
4141			Idx st_idx;
4142	23	50	for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4143			{
4144			Idx sl_idx;
4145	0		re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4146	0	0	for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4147			{
4148	0		re_sub_match_last_t *last = top->lasts[sl_idx];
4149	0	0	re_free (last->path.array);
4150	0	0	re_free (last);
4151			}
4152	0	0	re_free (top->lasts);
4153	0	0	if (top->path)
4154			{
4155	0	0	re_free (top->path->array);
4156	0	0	re_free (top->path);
4157			}
4158	0	0	re_free (top);
4159			}
4160
4161	23		mctx->nsub_tops = 0;
4162	23		mctx->nbkref_ents = 0;
4163	23		}
4164
4165			/* Free all the memory associated with MCTX. */
4166
4167			static void
4168			internal_function
4169	0		match_ctx_free (pTHX_ re_match_context_t *mctx)
4170			{
4171			/* First, free all the memory associated with MCTX->SUB_TOPS. */
4172	0		match_ctx_clean (aTHX_ mctx);
4173	0	0	re_free (mctx->sub_tops);
4174	0	0	re_free (mctx->bkref_ents);
4175	0		}
4176
4177			/* Add a new backreference entry to MCTX.
4178			Note that we assume that caller never call this function with duplicate
4179			entry, and call with STR_IDX which isn't smaller than any existing entry.
4180			*/
4181
4182			static reg_errcode_t
4183			internal_function __attribute_warn_unused_result__
4184	0		match_ctx_add_entry (pTHX_ re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4185			Idx to)
4186			{
4187	0	0	if (mctx->nbkref_ents >= mctx->abkref_ents)
4188			{
4189	0	0	re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, mctx->abkref_ents * 2);
4190	0		memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4191	0		sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4192	0		mctx->abkref_ents *= 2;
4193			}
4194	0	0	if (mctx->nbkref_ents > 0
4195	0	0	&& mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4196	0		mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4197
4198	0		mctx->bkref_ents[mctx->nbkref_ents].node = node;
4199	0		mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4200	0		mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4201	0		mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4202
4203			/* This is a cache that saves negative results of check_dst_limits_calc_pos.
4204			If bit N is clear, means that this entry won't epsilon-transition to
4205			an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4206			it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4207			such node.
4208
4209			A backreference does not epsilon-transition unless it is empty, so set
4210			to all zeros if FROM != TO. */
4211	0		mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4212	0	0	= (from == to ? -1 : 0);
4213
4214	0		mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4215	0	0	if (mctx->max_mb_elem_len < to - from)
4216	0		mctx->max_mb_elem_len = to - from;
4217	0		return REG_NOERROR;
4218			}
4219
4220			/* Return the first entry with the same str_idx, or REG_MISSING if none is
4221			found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
4222
4223			static Idx
4224			internal_function
4225	0		search_cur_bkref_entry (pTHX_ const re_match_context_t *mctx, Idx str_idx)
4226			{
4227			Idx left, right, mid, last;
4228	0		last = right = mctx->nbkref_ents;
4229	0	0	for (left = 0; left < right;)
4230			{
4231	0		mid = (left + right) / 2;
4232	0	0	if (mctx->bkref_ents[mid].str_idx < str_idx)
4233	0		left = mid + 1;
4234			else
4235	0		right = mid;
4236			}
4237	0	0	if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
		0
4238	0		return left;
4239			else
4240	0		return REG_MISSING;
4241			}
4242
4243			/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
4244			at STR_IDX. */
4245
4246			static reg_errcode_t
4247			internal_function __attribute_warn_unused_result__
4248	0		match_ctx_add_subtop (pTHX_ re_match_context_t *mctx, Idx node, Idx str_idx)
4249			{
4250			#ifdef DEBUG
4251			assert (mctx->sub_tops != NULL);
4252			assert (mctx->asub_tops > 0);
4253			#endif
4254	0	0	if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
4255			{
4256	0		Idx new_asub_tops = mctx->asub_tops * 2;
4257	0	0	re_realloc (mctx->sub_tops, re_sub_match_top_t *, new_asub_tops);
4258	0		mctx->asub_tops = new_asub_tops;
4259			}
4260	0		re_calloc(mctx->sub_tops[mctx->nsub_tops], re_sub_match_top_t, 1);
4261	0		mctx->sub_tops[mctx->nsub_tops]->node = node;
4262	0		mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4263	0		return REG_NOERROR;
4264			}
4265
4266			/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
4267			at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */
4268
4269			static re_sub_match_last_t *
4270			internal_function
4271	0		match_ctx_add_sublast (pTHX_ re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4272			{
4273			re_sub_match_last_t *new_entry;
4274	0	0	if (BE (subtop->nlasts == subtop->alasts, 0))
4275			{
4276	0		Idx new_alasts = 2 * subtop->alasts + 1;
4277	0	0	re_realloc (subtop->lasts, re_sub_match_last_t *, new_alasts);
4278	0		subtop->alasts = new_alasts;
4279			}
4280	0		re_calloc(new_entry, re_sub_match_last_t, 1);
4281	0		subtop->lasts[subtop->nlasts] = new_entry;
4282	0		new_entry->node = node;
4283	0		new_entry->str_idx = str_idx;
4284	0		++subtop->nlasts;
4285	0		return new_entry;
4286			}
4287
4288			static void
4289			internal_function
4290	9		sift_ctx_init (pTHX_ re_sift_context_t sctx, re_dfastate_t *sifted_sts,
4291			re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4292			{
4293	9		sctx->sifted_states = sifted_sts;
4294	9		sctx->limited_states = limited_sts;
4295	9		sctx->last_node = last_node;
4296	9		sctx->last_str_idx = last_str_idx;
4297	9		re_node_set_init_empty (&sctx->limits);
4298	9		}