Ruby 3.2.1p31 (2023-02-08 revision 31819e82c88c6f8ecfaeb162519bfa26a14b21fd)
regcomp.c
1/**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32
33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(void)
37{
38 return OnigDefaultCaseFoldFlag;
39}
40
41extern int
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43{
44 OnigDefaultCaseFoldFlag = case_fold_flag;
45 return 0;
46}
47
48
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51#endif
52
53#if 0
54static UChar*
55str_dup(UChar* s, UChar* end)
56{
57 ptrdiff_t len = end - s;
58
59 if (len > 0) {
60 UChar* r = (UChar* )xmalloc(len + 1);
61 CHECK_NULL_RETURN(r);
62 xmemcpy(r, s, len);
63 r[len] = (UChar )0;
64 return r;
65 }
66 else return NULL;
67}
68#endif
69
70static void
71swap_node(Node* a, Node* b)
72{
73 Node c;
74 c = *a; *a = *b; *b = c;
75
76 if (NTYPE(a) == NT_STR) {
77 StrNode* sn = NSTR(a);
78 if (sn->capa == 0) {
79 size_t len = sn->end - sn->s;
80 sn->s = sn->buf;
81 sn->end = sn->s + len;
82 }
83 }
84
85 if (NTYPE(b) == NT_STR) {
86 StrNode* sn = NSTR(b);
87 if (sn->capa == 0) {
88 size_t len = sn->end - sn->s;
89 sn->s = sn->buf;
90 sn->end = sn->s + len;
91 }
92 }
93}
94
95static OnigDistance
96distance_add(OnigDistance d1, OnigDistance d2)
97{
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
100 else {
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
103 }
104}
105
106static OnigDistance
107distance_multiply(OnigDistance d, int m)
108{
109 if (m == 0) return 0;
110
111 if (d < ONIG_INFINITE_DISTANCE / m)
112 return d * m;
113 else
114 return ONIG_INFINITE_DISTANCE;
115}
116
117static int
118bitset_is_empty(BitSetRef bs)
119{
120 int i;
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0) return 0;
123 }
124 return 1;
125}
126
127#ifdef ONIG_DEBUG
128static int
129bitset_on_num(BitSetRef bs)
130{
131 int i, n;
132
133 n = 0;
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
136 }
137 return n;
138}
139#endif
140
141// Attempt to right size allocated buffers for a regex post compile
142static void
143onig_reg_resize(regex_t *reg)
144{
145 do {
146 if (!reg->used) {
147 xfree(reg->p);
148 reg->alloc = 0;
149 reg->p = 0;
150 }
151 else if (reg->alloc > reg->used) {
152 unsigned char *new_ptr = xrealloc(reg->p, reg->used);
153 // Skip the right size optimization if memory allocation fails
154 if (new_ptr) {
155 reg->alloc = reg->used;
156 reg->p = new_ptr;
157 }
158 }
159 } while ((reg = reg->chain) != 0);
160}
161
162extern int
163onig_bbuf_init(BBuf* buf, OnigDistance size)
164{
165 if (size <= 0) {
166 size = 0;
167 buf->p = NULL;
168 }
169 else {
170 buf->p = (UChar* )xmalloc(size);
171 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
172 }
173
174 buf->alloc = (unsigned int )size;
175 buf->used = 0;
176 return 0;
177}
178
179
180#ifdef USE_SUBEXP_CALL
181
182static int
183unset_addr_list_init(UnsetAddrList* uslist, int size)
184{
185 UnsetAddr* p;
186
187 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
188 CHECK_NULL_RETURN_MEMERR(p);
189 uslist->num = 0;
190 uslist->alloc = size;
191 uslist->us = p;
192 return 0;
193}
194
195static void
196unset_addr_list_end(UnsetAddrList* uslist)
197{
198 if (IS_NOT_NULL(uslist->us))
199 xfree(uslist->us);
200}
201
202static int
203unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
204{
205 UnsetAddr* p;
206 int size;
207
208 if (uslist->num >= uslist->alloc) {
209 size = uslist->alloc * 2;
210 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
211 CHECK_NULL_RETURN_MEMERR(p);
212 uslist->alloc = size;
213 uslist->us = p;
214 }
215
216 uslist->us[uslist->num].offset = offset;
217 uslist->us[uslist->num].target = node;
218 uslist->num++;
219 return 0;
220}
221#endif /* USE_SUBEXP_CALL */
222
223
224static int
225add_opcode(regex_t* reg, int opcode)
226{
227 BBUF_ADD1(reg, opcode);
228 return 0;
229}
230
231#ifdef USE_COMBINATION_EXPLOSION_CHECK
232static int
233add_state_check_num(regex_t* reg, int num)
234{
235 StateCheckNumType n = (StateCheckNumType )num;
236
237 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
238 return 0;
239}
240#endif
241
242static int
243add_rel_addr(regex_t* reg, int addr)
244{
245 RelAddrType ra = (RelAddrType )addr;
246
247 BBUF_ADD(reg, &ra, SIZE_RELADDR);
248 return 0;
249}
250
251static int
252add_abs_addr(regex_t* reg, int addr)
253{
254 AbsAddrType ra = (AbsAddrType )addr;
255
256 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
257 return 0;
258}
259
260static int
261add_length(regex_t* reg, OnigDistance len)
262{
263 LengthType l = (LengthType )len;
264
265 BBUF_ADD(reg, &l, SIZE_LENGTH);
266 return 0;
267}
268
269static int
270add_mem_num(regex_t* reg, int num)
271{
272 MemNumType n = (MemNumType )num;
273
274 BBUF_ADD(reg, &n, SIZE_MEMNUM);
275 return 0;
276}
277
278#if 0
279static int
280add_pointer(regex_t* reg, void* addr)
281{
282 PointerType ptr = (PointerType )addr;
283
284 BBUF_ADD(reg, &ptr, SIZE_POINTER);
285 return 0;
286}
287#endif
288
289static int
290add_option(regex_t* reg, OnigOptionType option)
291{
292 BBUF_ADD(reg, &option, SIZE_OPTION);
293 return 0;
294}
295
296static int
297add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
298{
299 int r;
300
301 r = add_opcode(reg, opcode);
302 if (r) return r;
303 r = add_rel_addr(reg, addr);
304 return r;
305}
306
307static int
308add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
309{
310 BBUF_ADD(reg, bytes, len);
311 return 0;
312}
313
314static int
315add_bitset(regex_t* reg, BitSetRef bs)
316{
317 BBUF_ADD(reg, bs, SIZE_BITSET);
318 return 0;
319}
320
321static int
322add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
323{
324 int r;
325
326 r = add_opcode(reg, opcode);
327 if (r) return r;
328 r = add_option(reg, option);
329 return r;
330}
331
332static int compile_length_tree(Node* node, regex_t* reg);
333static int compile_tree(Node* node, regex_t* reg);
334
335
336#define IS_NEED_STR_LEN_OP_EXACT(op) \
337 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
338 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
339
340static int
341select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
342{
343 int op;
344 OnigDistance str_len = roomof(byte_len, mb_len);
345
346 if (ignore_case) {
347 switch (str_len) {
348 case 1: op = OP_EXACT1_IC; break;
349 default: op = OP_EXACTN_IC; break;
350 }
351 }
352 else {
353 switch (mb_len) {
354 case 1:
355 switch (str_len) {
356 case 1: op = OP_EXACT1; break;
357 case 2: op = OP_EXACT2; break;
358 case 3: op = OP_EXACT3; break;
359 case 4: op = OP_EXACT4; break;
360 case 5: op = OP_EXACT5; break;
361 default: op = OP_EXACTN; break;
362 }
363 break;
364
365 case 2:
366 switch (str_len) {
367 case 1: op = OP_EXACTMB2N1; break;
368 case 2: op = OP_EXACTMB2N2; break;
369 case 3: op = OP_EXACTMB2N3; break;
370 default: op = OP_EXACTMB2N; break;
371 }
372 break;
373
374 case 3:
375 op = OP_EXACTMB3N;
376 break;
377
378 default:
379 op = OP_EXACTMBN;
380 break;
381 }
382 }
383 return op;
384}
385
386static int
387compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
388{
389 int r;
390 int saved_num_null_check = reg->num_null_check;
391
392 if (empty_info != 0) {
393 r = add_opcode(reg, OP_NULL_CHECK_START);
394 if (r) return r;
395 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
396 if (r) return r;
397 reg->num_null_check++;
398 }
399
400 r = compile_tree(node, reg);
401 if (r) return r;
402
403 if (empty_info != 0) {
404 if (empty_info == NQ_TARGET_IS_EMPTY)
405 r = add_opcode(reg, OP_NULL_CHECK_END);
406 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
407 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
408 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
409 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
410
411 if (r) return r;
412 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
413 }
414 return r;
415}
416
417#ifdef USE_SUBEXP_CALL
418static int
419compile_call(CallNode* node, regex_t* reg)
420{
421 int r;
422
423 r = add_opcode(reg, OP_CALL);
424 if (r) return r;
425 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
426 node->target);
427 if (r) return r;
428 r = add_abs_addr(reg, 0 /*dummy addr.*/);
429 return r;
430}
431#endif
432
433static int
434compile_tree_n_times(Node* node, int n, regex_t* reg)
435{
436 int i, r;
437
438 for (i = 0; i < n; i++) {
439 r = compile_tree(node, reg);
440 if (r) return r;
441 }
442 return 0;
443}
444
445static int
446add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
447 regex_t* reg ARG_UNUSED, int ignore_case)
448{
449 int len;
450 int op = select_str_opcode(mb_len, byte_len, ignore_case);
451
452 len = SIZE_OPCODE;
453
454 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
455 if (IS_NEED_STR_LEN_OP_EXACT(op))
456 len += SIZE_LENGTH;
457
458 len += (int )byte_len;
459 return len;
460}
461
462static int
463add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
464 regex_t* reg, int ignore_case)
465{
466 int op = select_str_opcode(mb_len, byte_len, ignore_case);
467 add_opcode(reg, op);
468
469 if (op == OP_EXACTMBN)
470 add_length(reg, mb_len);
471
472 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
473 if (op == OP_EXACTN_IC)
474 add_length(reg, byte_len);
475 else
476 add_length(reg, byte_len / mb_len);
477 }
478
479 add_bytes(reg, s, byte_len);
480 return 0;
481}
482
483
484static int
485compile_length_string_node(Node* node, regex_t* reg)
486{
487 int rlen, r, len, prev_len, blen, ambig;
488 OnigEncoding enc = reg->enc;
489 UChar *p, *prev;
490 StrNode* sn;
491
492 sn = NSTR(node);
493 if (sn->end <= sn->s)
494 return 0;
495
496 ambig = NSTRING_IS_AMBIG(node);
497
498 p = prev = sn->s;
499 prev_len = enclen(enc, p, sn->end);
500 p += prev_len;
501 blen = prev_len;
502 rlen = 0;
503
504 for (; p < sn->end; ) {
505 len = enclen(enc, p, sn->end);
506 if (len == prev_len || ambig) {
507 blen += len;
508 }
509 else {
510 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
511 rlen += r;
512 prev = p;
513 blen = len;
514 prev_len = len;
515 }
516 p += len;
517 }
518 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
519 rlen += r;
520 return rlen;
521}
522
523static int
524compile_length_string_raw_node(StrNode* sn, regex_t* reg)
525{
526 if (sn->end <= sn->s)
527 return 0;
528
529 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
530}
531
532static int
533compile_string_node(Node* node, regex_t* reg)
534{
535 int r, len, prev_len, blen, ambig;
536 OnigEncoding enc = reg->enc;
537 UChar *p, *prev, *end;
538 StrNode* sn;
539
540 sn = NSTR(node);
541 if (sn->end <= sn->s)
542 return 0;
543
544 end = sn->end;
545 ambig = NSTRING_IS_AMBIG(node);
546
547 p = prev = sn->s;
548 prev_len = enclen(enc, p, end);
549 p += prev_len;
550 blen = prev_len;
551
552 for (; p < end; ) {
553 len = enclen(enc, p, end);
554 if (len == prev_len || ambig) {
555 blen += len;
556 }
557 else {
558 r = add_compile_string(prev, prev_len, blen, reg, ambig);
559 if (r) return r;
560
561 prev = p;
562 blen = len;
563 prev_len = len;
564 }
565
566 p += len;
567 }
568 return add_compile_string(prev, prev_len, blen, reg, ambig);
569}
570
571static int
572compile_string_raw_node(StrNode* sn, regex_t* reg)
573{
574 if (sn->end <= sn->s)
575 return 0;
576
577 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
578}
579
580static int
581add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
582{
583#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
584 add_length(reg, mbuf->used);
585 return add_bytes(reg, mbuf->p, mbuf->used);
586#else
587 int r, pad_size;
588 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
589
590 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
591 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
592 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
593
594 r = add_bytes(reg, mbuf->p, mbuf->used);
595
596 /* padding for return value from compile_length_cclass_node() to be fix. */
597 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
598 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
599 return r;
600#endif
601}
602
603static int
604compile_length_cclass_node(CClassNode* cc, regex_t* reg)
605{
606 int len;
607
608 if (IS_NULL(cc->mbuf)) {
609 len = SIZE_OPCODE + SIZE_BITSET;
610 }
611 else {
612 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
613 len = SIZE_OPCODE;
614 }
615 else {
616 len = SIZE_OPCODE + SIZE_BITSET;
617 }
618#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
619 len += SIZE_LENGTH + cc->mbuf->used;
620#else
621 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
622#endif
623 }
624
625 return len;
626}
627
628static int
629compile_cclass_node(CClassNode* cc, regex_t* reg)
630{
631 int r;
632
633 if (IS_NULL(cc->mbuf)) {
634 if (IS_NCCLASS_NOT(cc))
635 add_opcode(reg, OP_CCLASS_NOT);
636 else
637 add_opcode(reg, OP_CCLASS);
638
639 r = add_bitset(reg, cc->bs);
640 }
641 else {
642 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
643 if (IS_NCCLASS_NOT(cc))
644 add_opcode(reg, OP_CCLASS_MB_NOT);
645 else
646 add_opcode(reg, OP_CCLASS_MB);
647
648 r = add_multi_byte_cclass(cc->mbuf, reg);
649 }
650 else {
651 if (IS_NCCLASS_NOT(cc))
652 add_opcode(reg, OP_CCLASS_MIX_NOT);
653 else
654 add_opcode(reg, OP_CCLASS_MIX);
655
656 r = add_bitset(reg, cc->bs);
657 if (r) return r;
658 r = add_multi_byte_cclass(cc->mbuf, reg);
659 }
660 }
661
662 return r;
663}
664
665static int
666entry_repeat_range(regex_t* reg, int id, int lower, int upper)
667{
668#define REPEAT_RANGE_ALLOC 4
669
671
672 if (reg->repeat_range_alloc == 0) {
673 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
674 CHECK_NULL_RETURN_MEMERR(p);
675 reg->repeat_range = p;
676 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
677 }
678 else if (reg->repeat_range_alloc <= id) {
679 int n;
680 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
681 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
682 sizeof(OnigRepeatRange) * n);
683 CHECK_NULL_RETURN_MEMERR(p);
684 reg->repeat_range = p;
685 reg->repeat_range_alloc = n;
686 }
687 else {
688 p = reg->repeat_range;
689 }
690
691 p[id].lower = lower;
692 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
693 return 0;
694}
695
696static int
697compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
698 regex_t* reg)
699{
700 int r;
701 int num_repeat = reg->num_repeat;
702
703 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
704 if (r) return r;
705 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
706 reg->num_repeat++;
707 if (r) return r;
708 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
709 if (r) return r;
710
711 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
712 if (r) return r;
713
714 r = compile_tree_empty_check(qn->target, reg, empty_info);
715 if (r) return r;
716
717 if (
718#ifdef USE_SUBEXP_CALL
719 reg->num_call > 0 ||
720#endif
721 IS_QUANTIFIER_IN_REPEAT(qn)) {
722 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
723 }
724 else {
725 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
726 }
727 if (r) return r;
728 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
729 return r;
730}
731
732static int
733is_anychar_star_quantifier(QtfrNode* qn)
734{
735 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
736 NTYPE(qn->target) == NT_CANY)
737 return 1;
738 else
739 return 0;
740}
741
742#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
743#define CKN_ON (ckn > 0)
744
745#ifdef USE_COMBINATION_EXPLOSION_CHECK
746
747static int
748compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
749{
750 int len, mod_tlen, cklen;
751 int ckn;
752 int infinite = IS_REPEAT_INFINITE(qn->upper);
753 int empty_info = qn->target_empty_info;
754 int tlen = compile_length_tree(qn->target, reg);
755
756 if (tlen < 0) return tlen;
757
758 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
759
760 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
761
762 /* anychar repeat */
763 if (NTYPE(qn->target) == NT_CANY) {
764 if (qn->greedy && infinite) {
765 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
766 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
767 else
768 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
769 }
770 }
771
772 if (empty_info != 0)
773 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
774 else
775 mod_tlen = tlen;
776
777 if (infinite && qn->lower <= 1) {
778 if (qn->greedy) {
779 if (qn->lower == 1)
780 len = SIZE_OP_JUMP;
781 else
782 len = 0;
783
784 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
785 }
786 else {
787 if (qn->lower == 0)
788 len = SIZE_OP_JUMP;
789 else
790 len = 0;
791
792 len += mod_tlen + SIZE_OP_PUSH + cklen;
793 }
794 }
795 else if (qn->upper == 0) {
796 if (qn->is_referred != 0) /* /(?<n>..){0}/ */
797 len = SIZE_OP_JUMP + tlen;
798 else
799 len = 0;
800 }
801 else if (qn->upper == 1 && qn->greedy) {
802 if (qn->lower == 0) {
803 if (CKN_ON) {
804 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
805 }
806 else {
807 len = SIZE_OP_PUSH + tlen;
808 }
809 }
810 else {
811 len = tlen;
812 }
813 }
814 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
815 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
816 }
817 else {
818 len = SIZE_OP_REPEAT_INC
819 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
820 if (CKN_ON)
821 len += SIZE_OP_STATE_CHECK;
822 }
823
824 return len;
825}
826
827static int
828compile_quantifier_node(QtfrNode* qn, regex_t* reg)
829{
830 int r, mod_tlen;
831 int ckn;
832 int infinite = IS_REPEAT_INFINITE(qn->upper);
833 int empty_info = qn->target_empty_info;
834 int tlen = compile_length_tree(qn->target, reg);
835
836 if (tlen < 0) return tlen;
837
838 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
839
840 if (is_anychar_star_quantifier(qn)) {
841 r = compile_tree_n_times(qn->target, qn->lower, reg);
842 if (r) return r;
843 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
844 if (IS_MULTILINE(reg->options))
845 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
846 else
847 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
848 if (r) return r;
849 if (CKN_ON) {
850 r = add_state_check_num(reg, ckn);
851 if (r) return r;
852 }
853
854 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
855 }
856 else {
857 if (IS_MULTILINE(reg->options)) {
858 r = add_opcode(reg, (CKN_ON ?
859 OP_STATE_CHECK_ANYCHAR_ML_STAR
860 : OP_ANYCHAR_ML_STAR));
861 }
862 else {
863 r = add_opcode(reg, (CKN_ON ?
864 OP_STATE_CHECK_ANYCHAR_STAR
865 : OP_ANYCHAR_STAR));
866 }
867 if (r) return r;
868 if (CKN_ON)
869 r = add_state_check_num(reg, ckn);
870
871 return r;
872 }
873 }
874
875 if (empty_info != 0)
876 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
877 else
878 mod_tlen = tlen;
879
880 if (infinite && qn->lower <= 1) {
881 if (qn->greedy) {
882 if (qn->lower == 1) {
883 r = add_opcode_rel_addr(reg, OP_JUMP,
884 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
885 if (r) return r;
886 }
887
888 if (CKN_ON) {
889 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
890 if (r) return r;
891 r = add_state_check_num(reg, ckn);
892 if (r) return r;
893 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
894 }
895 else {
896 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
897 }
898 if (r) return r;
899 r = compile_tree_empty_check(qn->target, reg, empty_info);
900 if (r) return r;
901 r = add_opcode_rel_addr(reg, OP_JUMP,
902 -(mod_tlen + (int )SIZE_OP_JUMP
903 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
904 }
905 else {
906 if (qn->lower == 0) {
907 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
908 if (r) return r;
909 }
910 r = compile_tree_empty_check(qn->target, reg, empty_info);
911 if (r) return r;
912 if (CKN_ON) {
913 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
914 if (r) return r;
915 r = add_state_check_num(reg, ckn);
916 if (r) return r;
917 r = add_rel_addr(reg,
918 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
919 }
920 else
921 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
922 }
923 }
924 else if (qn->upper == 0) {
925 if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
926 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
927 if (r) return r;
928 r = compile_tree(qn->target, reg);
929 }
930 else
931 r = 0;
932 }
933 else if (qn->upper == 1 && qn->greedy) {
934 if (qn->lower == 0) {
935 if (CKN_ON) {
936 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
937 if (r) return r;
938 r = add_state_check_num(reg, ckn);
939 if (r) return r;
940 r = add_rel_addr(reg, tlen);
941 }
942 else {
943 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
944 }
945 if (r) return r;
946 }
947
948 r = compile_tree(qn->target, reg);
949 }
950 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
951 if (CKN_ON) {
952 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
953 if (r) return r;
954 r = add_state_check_num(reg, ckn);
955 if (r) return r;
956 r = add_rel_addr(reg, SIZE_OP_JUMP);
957 }
958 else {
959 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
960 }
961
962 if (r) return r;
963 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
964 if (r) return r;
965 r = compile_tree(qn->target, reg);
966 }
967 else {
968 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
969 if (CKN_ON) {
970 if (r) return r;
971 r = add_opcode(reg, OP_STATE_CHECK);
972 if (r) return r;
973 r = add_state_check_num(reg, ckn);
974 }
975 }
976 return r;
977}
978
979#else /* USE_COMBINATION_EXPLOSION_CHECK */
980
981static int
982compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
983{
984 int len, mod_tlen;
985 int infinite = IS_REPEAT_INFINITE(qn->upper);
986 int empty_info = qn->target_empty_info;
987 int tlen = compile_length_tree(qn->target, reg);
988
989 if (tlen < 0) return tlen;
990
991 /* anychar repeat */
992 if (NTYPE(qn->target) == NT_CANY) {
993 if (qn->greedy && infinite) {
994 if (IS_NOT_NULL(qn->next_head_exact))
995 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
996 else
997 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
998 }
999 }
1000
1001 if (empty_info != 0)
1002 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1003 else
1004 mod_tlen = tlen;
1005
1006 if (infinite &&
1007 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1008 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1009 len = SIZE_OP_JUMP;
1010 }
1011 else {
1012 len = tlen * qn->lower;
1013 }
1014
1015 if (qn->greedy) {
1016#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1017 if (IS_NOT_NULL(qn->head_exact))
1018 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1019 else
1020#endif
1021 if (IS_NOT_NULL(qn->next_head_exact))
1022 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1023 else
1024 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1025 }
1026 else
1027 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1028 }
1029 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1030 len = SIZE_OP_JUMP + tlen;
1031 }
1032 else if (!infinite && qn->greedy &&
1033 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1034 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1035 len = tlen * qn->lower;
1036 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1037 }
1038 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1039 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1040 }
1041 else {
1042 len = SIZE_OP_REPEAT_INC
1043 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1044 }
1045
1046 return len;
1047}
1048
1049static int
1050compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1051{
1052 int i, r, mod_tlen;
1053 int infinite = IS_REPEAT_INFINITE(qn->upper);
1054 int empty_info = qn->target_empty_info;
1055 int tlen = compile_length_tree(qn->target, reg);
1056
1057 if (tlen < 0) return tlen;
1058
1059 if (is_anychar_star_quantifier(qn)) {
1060 r = compile_tree_n_times(qn->target, qn->lower, reg);
1061 if (r) return r;
1062 if (IS_NOT_NULL(qn->next_head_exact)) {
1063 if (IS_MULTILINE(reg->options))
1064 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1065 else
1066 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1067 if (r) return r;
1068 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1069 }
1070 else {
1071 if (IS_MULTILINE(reg->options))
1072 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1073 else
1074 return add_opcode(reg, OP_ANYCHAR_STAR);
1075 }
1076 }
1077
1078 if (empty_info != 0)
1079 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1080 else
1081 mod_tlen = tlen;
1082
1083 if (infinite &&
1084 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1085 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1086 if (qn->greedy) {
1087#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1088 if (IS_NOT_NULL(qn->head_exact))
1089 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1090 else
1091#endif
1092 if (IS_NOT_NULL(qn->next_head_exact))
1093 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1094 else
1095 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1096 }
1097 else {
1098 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1099 }
1100 if (r) return r;
1101 }
1102 else {
1103 r = compile_tree_n_times(qn->target, qn->lower, reg);
1104 if (r) return r;
1105 }
1106
1107 if (qn->greedy) {
1108#ifdef USE_OP_PUSH_OR_JUMP_EXACT
1109 if (IS_NOT_NULL(qn->head_exact)) {
1110 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1111 mod_tlen + SIZE_OP_JUMP);
1112 if (r) return r;
1113 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1114 r = compile_tree_empty_check(qn->target, reg, empty_info);
1115 if (r) return r;
1116 r = add_opcode_rel_addr(reg, OP_JUMP,
1117 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1118 }
1119 else
1120#endif
1121 if (IS_NOT_NULL(qn->next_head_exact)) {
1122 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1123 mod_tlen + SIZE_OP_JUMP);
1124 if (r) return r;
1125 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1126 r = compile_tree_empty_check(qn->target, reg, empty_info);
1127 if (r) return r;
1128 r = add_opcode_rel_addr(reg, OP_JUMP,
1129 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1130 }
1131 else {
1132 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1133 if (r) return r;
1134 r = compile_tree_empty_check(qn->target, reg, empty_info);
1135 if (r) return r;
1136 r = add_opcode_rel_addr(reg, OP_JUMP,
1137 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1138 }
1139 }
1140 else {
1141 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1142 if (r) return r;
1143 r = compile_tree_empty_check(qn->target, reg, empty_info);
1144 if (r) return r;
1145 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1146 }
1147 }
1148 else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
1149 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1150 if (r) return r;
1151 r = compile_tree(qn->target, reg);
1152 }
1153 else if (!infinite && qn->greedy &&
1154 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1155 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1156 int n = qn->upper - qn->lower;
1157
1158 r = compile_tree_n_times(qn->target, qn->lower, reg);
1159 if (r) return r;
1160
1161 for (i = 0; i < n; i++) {
1162 r = add_opcode_rel_addr(reg, OP_PUSH,
1163 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1164 if (r) return r;
1165 r = compile_tree(qn->target, reg);
1166 if (r) return r;
1167 }
1168 }
1169 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1170 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1171 if (r) return r;
1172 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1173 if (r) return r;
1174 r = compile_tree(qn->target, reg);
1175 }
1176 else {
1177 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1178 }
1179 return r;
1180}
1181#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1182
1183static int
1184compile_length_option_node(EncloseNode* node, regex_t* reg)
1185{
1186 int tlen;
1187 OnigOptionType prev = reg->options;
1188
1189 reg->options = node->option;
1190 tlen = compile_length_tree(node->target, reg);
1191 reg->options = prev;
1192
1193 if (tlen < 0) return tlen;
1194
1195 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1196 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1197 + tlen + SIZE_OP_SET_OPTION;
1198 }
1199 else
1200 return tlen;
1201}
1202
1203static int
1204compile_option_node(EncloseNode* node, regex_t* reg)
1205{
1206 int r;
1207 OnigOptionType prev = reg->options;
1208
1209 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1210 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1211 if (r) return r;
1212 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1213 if (r) return r;
1214 r = add_opcode(reg, OP_FAIL);
1215 if (r) return r;
1216 }
1217
1218 reg->options = node->option;
1219 r = compile_tree(node->target, reg);
1220 reg->options = prev;
1221
1222 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1223 if (r) return r;
1224 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1225 }
1226 return r;
1227}
1228
1229static int
1230compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1231{
1232 int len;
1233 int tlen;
1234
1235 if (node->type == ENCLOSE_OPTION)
1236 return compile_length_option_node(node, reg);
1237
1238 if (node->target) {
1239 tlen = compile_length_tree(node->target, reg);
1240 if (tlen < 0) return tlen;
1241 }
1242 else
1243 tlen = 0;
1244
1245 switch (node->type) {
1246 case ENCLOSE_MEMORY:
1247#ifdef USE_SUBEXP_CALL
1248 if (IS_ENCLOSE_CALLED(node)) {
1249 len = SIZE_OP_MEMORY_START_PUSH + tlen
1250 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1251 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1252 len += (IS_ENCLOSE_RECURSION(node)
1253 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1254 else
1255 len += (IS_ENCLOSE_RECURSION(node)
1256 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1257 }
1258 else if (IS_ENCLOSE_RECURSION(node)) {
1259 len = SIZE_OP_MEMORY_START_PUSH;
1260 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1261 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1262 }
1263 else
1264#endif
1265 {
1266 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1267 len = SIZE_OP_MEMORY_START_PUSH;
1268 else
1269 len = SIZE_OP_MEMORY_START;
1270
1271 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1272 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1273 }
1274 break;
1275
1276 case ENCLOSE_STOP_BACKTRACK:
1277 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1278 QtfrNode* qn = NQTFR(node->target);
1279 tlen = compile_length_tree(qn->target, reg);
1280 if (tlen < 0) return tlen;
1281
1282 len = tlen * qn->lower
1283 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1284 }
1285 else {
1286 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1287 }
1288 break;
1289
1290 case ENCLOSE_CONDITION:
1291 len = SIZE_OP_CONDITION;
1292 if (NTYPE(node->target) == NT_ALT) {
1293 Node* x = node->target;
1294
1295 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1296 if (tlen < 0) return tlen;
1297 len += tlen + SIZE_OP_JUMP;
1298 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1299 x = NCDR(x);
1300 tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1301 if (tlen < 0) return tlen;
1302 len += tlen;
1303 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1304 }
1305 else {
1306 return ONIGERR_PARSER_BUG;
1307 }
1308 break;
1309
1310 case ENCLOSE_ABSENT:
1311 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1312 break;
1313
1314 default:
1315 return ONIGERR_TYPE_BUG;
1316 break;
1317 }
1318
1319 return len;
1320}
1321
1322static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1323
1324static int
1325compile_enclose_node(EncloseNode* node, regex_t* reg)
1326{
1327 int r, len;
1328
1329 if (node->type == ENCLOSE_OPTION)
1330 return compile_option_node(node, reg);
1331
1332 switch (node->type) {
1333 case ENCLOSE_MEMORY:
1334#ifdef USE_SUBEXP_CALL
1335 if (IS_ENCLOSE_CALLED(node)) {
1336 r = add_opcode(reg, OP_CALL);
1337 if (r) return r;
1338 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1339 node->state |= NST_ADDR_FIXED;
1340 r = add_abs_addr(reg, (int )node->call_addr);
1341 if (r) return r;
1342 len = compile_length_tree(node->target, reg);
1343 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1344 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1345 len += (IS_ENCLOSE_RECURSION(node)
1346 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1347 else
1348 len += (IS_ENCLOSE_RECURSION(node)
1349 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1350
1351 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1352 if (r) return r;
1353 }
1354#endif
1355 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1356 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1357 else
1358 r = add_opcode(reg, OP_MEMORY_START);
1359 if (r) return r;
1360 r = add_mem_num(reg, node->regnum);
1361 if (r) return r;
1362 r = compile_tree(node->target, reg);
1363 if (r) return r;
1364#ifdef USE_SUBEXP_CALL
1365 if (IS_ENCLOSE_CALLED(node)) {
1366 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1367 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1368 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1369 else
1370 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1371 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1372
1373 if (r) return r;
1374 r = add_mem_num(reg, node->regnum);
1375 if (r) return r;
1376 r = add_opcode(reg, OP_RETURN);
1377 }
1378 else if (IS_ENCLOSE_RECURSION(node)) {
1379 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1380 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1381 else
1382 r = add_opcode(reg, OP_MEMORY_END_REC);
1383 if (r) return r;
1384 r = add_mem_num(reg, node->regnum);
1385 }
1386 else
1387#endif
1388 {
1389 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1390 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1391 else
1392 r = add_opcode(reg, OP_MEMORY_END);
1393 if (r) return r;
1394 r = add_mem_num(reg, node->regnum);
1395 }
1396 break;
1397
1398 case ENCLOSE_STOP_BACKTRACK:
1399 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1400 QtfrNode* qn = NQTFR(node->target);
1401 r = compile_tree_n_times(qn->target, qn->lower, reg);
1402 if (r) return r;
1403
1404 len = compile_length_tree(qn->target, reg);
1405 if (len < 0) return len;
1406
1407 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1408 if (r) return r;
1409 r = compile_tree(qn->target, reg);
1410 if (r) return r;
1411 r = add_opcode(reg, OP_POP);
1412 if (r) return r;
1413 r = add_opcode_rel_addr(reg, OP_JUMP,
1414 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1415 }
1416 else {
1417 r = add_opcode(reg, OP_PUSH_STOP_BT);
1418 if (r) return r;
1419 r = compile_tree(node->target, reg);
1420 if (r) return r;
1421 r = add_opcode(reg, OP_POP_STOP_BT);
1422 }
1423 break;
1424
1425 case ENCLOSE_CONDITION:
1426 r = add_opcode(reg, OP_CONDITION);
1427 if (r) return r;
1428 r = add_mem_num(reg, node->regnum);
1429 if (r) return r;
1430
1431 if (NTYPE(node->target) == NT_ALT) {
1432 Node* x = node->target;
1433 int len2;
1434
1435 len = compile_length_tree(NCAR(x), reg); /* yes-node */
1436 if (len < 0) return len;
1437 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1438 x = NCDR(x);
1439 len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1440 if (len2 < 0) return len2;
1441 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1442
1443 x = node->target;
1444 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1445 if (r) return r;
1446 r = compile_tree(NCAR(x), reg); /* yes-node */
1447 if (r) return r;
1448 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1449 if (r) return r;
1450 x = NCDR(x);
1451 r = compile_tree(NCAR(x), reg); /* no-node */
1452 }
1453 else {
1454 return ONIGERR_PARSER_BUG;
1455 }
1456 break;
1457
1458 case ENCLOSE_ABSENT:
1459 len = compile_length_tree(node->target, reg);
1460 if (len < 0) return len;
1461
1462 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1463 if (r) return r;
1464 r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1465 if (r) return r;
1466 r = compile_tree(node->target, reg);
1467 if (r) return r;
1468 r = add_opcode(reg, OP_ABSENT_END);
1469 break;
1470
1471 default:
1472 return ONIGERR_TYPE_BUG;
1473 break;
1474 }
1475
1476 return r;
1477}
1478
1479static int
1480compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1481{
1482 int len;
1483 int tlen = 0;
1484
1485 if (node->target) {
1486 tlen = compile_length_tree(node->target, reg);
1487 if (tlen < 0) return tlen;
1488 }
1489
1490 switch (node->type) {
1491 case ANCHOR_PREC_READ:
1492 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1493 break;
1494 case ANCHOR_PREC_READ_NOT:
1495 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1496 break;
1497 case ANCHOR_LOOK_BEHIND:
1498 len = SIZE_OP_LOOK_BEHIND + tlen;
1499 break;
1500 case ANCHOR_LOOK_BEHIND_NOT:
1501 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1502 break;
1503
1504 default:
1505 len = SIZE_OPCODE;
1506 break;
1507 }
1508
1509 return len;
1510}
1511
1512static int
1513compile_anchor_node(AnchorNode* node, regex_t* reg)
1514{
1515 int r, len;
1516
1517 switch (node->type) {
1518 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1519 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1520 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1521 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1522 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1523 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1524
1525 case ANCHOR_WORD_BOUND:
1526 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1527 else r = add_opcode(reg, OP_WORD_BOUND);
1528 break;
1529 case ANCHOR_NOT_WORD_BOUND:
1530 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1531 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1532 break;
1533#ifdef USE_WORD_BEGIN_END
1534 case ANCHOR_WORD_BEGIN:
1535 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1536 else r = add_opcode(reg, OP_WORD_BEGIN);
1537 break;
1538 case ANCHOR_WORD_END:
1539 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1540 else r = add_opcode(reg, OP_WORD_END);
1541 break;
1542#endif
1543 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1544
1545 case ANCHOR_PREC_READ:
1546 r = add_opcode(reg, OP_PUSH_POS);
1547 if (r) return r;
1548 r = compile_tree(node->target, reg);
1549 if (r) return r;
1550 r = add_opcode(reg, OP_POP_POS);
1551 break;
1552
1553 case ANCHOR_PREC_READ_NOT:
1554 len = compile_length_tree(node->target, reg);
1555 if (len < 0) return len;
1556 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1557 if (r) return r;
1558 r = compile_tree(node->target, reg);
1559 if (r) return r;
1560 r = add_opcode(reg, OP_FAIL_POS);
1561 break;
1562
1563 case ANCHOR_LOOK_BEHIND:
1564 {
1565 int n;
1566 r = add_opcode(reg, OP_LOOK_BEHIND);
1567 if (r) return r;
1568 if (node->char_len < 0) {
1569 r = get_char_length_tree(node->target, reg, &n);
1570 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1571 }
1572 else
1573 n = node->char_len;
1574 r = add_length(reg, n);
1575 if (r) return r;
1576 r = compile_tree(node->target, reg);
1577 }
1578 break;
1579
1580 case ANCHOR_LOOK_BEHIND_NOT:
1581 {
1582 int n;
1583 len = compile_length_tree(node->target, reg);
1584 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1585 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1586 if (r) return r;
1587 if (node->char_len < 0) {
1588 r = get_char_length_tree(node->target, reg, &n);
1589 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1590 }
1591 else
1592 n = node->char_len;
1593 r = add_length(reg, n);
1594 if (r) return r;
1595 r = compile_tree(node->target, reg);
1596 if (r) return r;
1597 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1598 }
1599 break;
1600
1601 default:
1602 return ONIGERR_TYPE_BUG;
1603 break;
1604 }
1605
1606 return r;
1607}
1608
1609static int
1610compile_length_tree(Node* node, regex_t* reg)
1611{
1612 int len, type, r;
1613
1614 type = NTYPE(node);
1615 switch (type) {
1616 case NT_LIST:
1617 len = 0;
1618 do {
1619 r = compile_length_tree(NCAR(node), reg);
1620 if (r < 0) return r;
1621 len += r;
1622 } while (IS_NOT_NULL(node = NCDR(node)));
1623 r = len;
1624 break;
1625
1626 case NT_ALT:
1627 {
1628 int n = 0;
1629 len = 0;
1630 do {
1631 r = compile_length_tree(NCAR(node), reg);
1632 if (r < 0) return r;
1633 len += r;
1634 n++;
1635 } while (IS_NOT_NULL(node = NCDR(node)));
1636 r = len;
1637 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1638 }
1639 break;
1640
1641 case NT_STR:
1642 if (NSTRING_IS_RAW(node))
1643 r = compile_length_string_raw_node(NSTR(node), reg);
1644 else
1645 r = compile_length_string_node(node, reg);
1646 break;
1647
1648 case NT_CCLASS:
1649 r = compile_length_cclass_node(NCCLASS(node), reg);
1650 break;
1651
1652 case NT_CTYPE:
1653 case NT_CANY:
1654 r = SIZE_OPCODE;
1655 break;
1656
1657 case NT_BREF:
1658 {
1659 BRefNode* br = NBREF(node);
1660
1661#ifdef USE_BACKREF_WITH_LEVEL
1662 if (IS_BACKREF_NEST_LEVEL(br)) {
1663 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1664 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1665 }
1666 else
1667#endif
1668 if (br->back_num == 1) {
1669 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1670 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1671 }
1672 else {
1673 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1674 }
1675 }
1676 break;
1677
1678#ifdef USE_SUBEXP_CALL
1679 case NT_CALL:
1680 r = SIZE_OP_CALL;
1681 break;
1682#endif
1683
1684 case NT_QTFR:
1685 r = compile_length_quantifier_node(NQTFR(node), reg);
1686 break;
1687
1688 case NT_ENCLOSE:
1689 r = compile_length_enclose_node(NENCLOSE(node), reg);
1690 break;
1691
1692 case NT_ANCHOR:
1693 r = compile_length_anchor_node(NANCHOR(node), reg);
1694 break;
1695
1696 default:
1697 return ONIGERR_TYPE_BUG;
1698 break;
1699 }
1700
1701 return r;
1702}
1703
1704static int
1705compile_tree(Node* node, regex_t* reg)
1706{
1707 int n, type, len, pos, r = 0;
1708
1709 type = NTYPE(node);
1710 switch (type) {
1711 case NT_LIST:
1712 do {
1713 r = compile_tree(NCAR(node), reg);
1714 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1715 break;
1716
1717 case NT_ALT:
1718 {
1719 Node* x = node;
1720 len = 0;
1721 do {
1722 len += compile_length_tree(NCAR(x), reg);
1723 if (NCDR(x) != NULL) {
1724 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1725 }
1726 } while (IS_NOT_NULL(x = NCDR(x)));
1727 pos = reg->used + len; /* goal position */
1728
1729 do {
1730 len = compile_length_tree(NCAR(node), reg);
1731 if (IS_NOT_NULL(NCDR(node))) {
1732 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1733 if (r) break;
1734 }
1735 r = compile_tree(NCAR(node), reg);
1736 if (r) break;
1737 if (IS_NOT_NULL(NCDR(node))) {
1738 len = pos - (reg->used + SIZE_OP_JUMP);
1739 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1740 if (r) break;
1741 }
1742 } while (IS_NOT_NULL(node = NCDR(node)));
1743 }
1744 break;
1745
1746 case NT_STR:
1747 if (NSTRING_IS_RAW(node))
1748 r = compile_string_raw_node(NSTR(node), reg);
1749 else
1750 r = compile_string_node(node, reg);
1751 break;
1752
1753 case NT_CCLASS:
1754 r = compile_cclass_node(NCCLASS(node), reg);
1755 break;
1756
1757 case NT_CTYPE:
1758 {
1759 int op;
1760
1761 switch (NCTYPE(node)->ctype) {
1762 case ONIGENC_CTYPE_WORD:
1763 if (NCTYPE(node)->ascii_range != 0) {
1764 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1765 else op = OP_ASCII_WORD;
1766 }
1767 else {
1768 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1769 else op = OP_WORD;
1770 }
1771 break;
1772 default:
1773 return ONIGERR_TYPE_BUG;
1774 break;
1775 }
1776 r = add_opcode(reg, op);
1777 }
1778 break;
1779
1780 case NT_CANY:
1781 if (IS_MULTILINE(reg->options))
1782 r = add_opcode(reg, OP_ANYCHAR_ML);
1783 else
1784 r = add_opcode(reg, OP_ANYCHAR);
1785 break;
1786
1787 case NT_BREF:
1788 {
1789 BRefNode* br = NBREF(node);
1790
1791#ifdef USE_BACKREF_WITH_LEVEL
1792 if (IS_BACKREF_NEST_LEVEL(br)) {
1793 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1794 if (r) return r;
1795 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1796 if (r) return r;
1797 r = add_length(reg, br->nest_level);
1798 if (r) return r;
1799
1800 goto add_bacref_mems;
1801 }
1802 else
1803#endif
1804 if (br->back_num == 1) {
1805 n = br->back_static[0];
1806 if (IS_IGNORECASE(reg->options)) {
1807 r = add_opcode(reg, OP_BACKREFN_IC);
1808 if (r) return r;
1809 r = add_mem_num(reg, n);
1810 }
1811 else {
1812 switch (n) {
1813 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1814 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1815 default:
1816 r = add_opcode(reg, OP_BACKREFN);
1817 if (r) return r;
1818 r = add_mem_num(reg, n);
1819 break;
1820 }
1821 }
1822 }
1823 else {
1824 int i;
1825 int* p;
1826
1827 if (IS_IGNORECASE(reg->options)) {
1828 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1829 }
1830 else {
1831 r = add_opcode(reg, OP_BACKREF_MULTI);
1832 }
1833 if (r) return r;
1834
1835#ifdef USE_BACKREF_WITH_LEVEL
1836 add_bacref_mems:
1837#endif
1838 r = add_length(reg, br->back_num);
1839 if (r) return r;
1840 p = BACKREFS_P(br);
1841 for (i = br->back_num - 1; i >= 0; i--) {
1842 r = add_mem_num(reg, p[i]);
1843 if (r) return r;
1844 }
1845 }
1846 }
1847 break;
1848
1849#ifdef USE_SUBEXP_CALL
1850 case NT_CALL:
1851 r = compile_call(NCALL(node), reg);
1852 break;
1853#endif
1854
1855 case NT_QTFR:
1856 r = compile_quantifier_node(NQTFR(node), reg);
1857 break;
1858
1859 case NT_ENCLOSE:
1860 r = compile_enclose_node(NENCLOSE(node), reg);
1861 break;
1862
1863 case NT_ANCHOR:
1864 r = compile_anchor_node(NANCHOR(node), reg);
1865 break;
1866
1867 default:
1868#ifdef ONIG_DEBUG
1869 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1870#endif
1871 break;
1872 }
1873
1874 return r;
1875}
1876
1877#ifdef USE_NAMED_GROUP
1878
1879static int
1880noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1881{
1882 int r = 0;
1883 Node* node = *plink;
1884
1885 switch (NTYPE(node)) {
1886 case NT_LIST:
1887 case NT_ALT:
1888 do {
1889 r = noname_disable_map(&(NCAR(node)), map, counter);
1890 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1891 break;
1892
1893 case NT_QTFR:
1894 {
1895 Node** ptarget = &(NQTFR(node)->target);
1896 Node* old = *ptarget;
1897 r = noname_disable_map(ptarget, map, counter);
1898 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1899 onig_reduce_nested_quantifier(node, *ptarget);
1900 }
1901 }
1902 break;
1903
1904 case NT_ENCLOSE:
1905 {
1906 EncloseNode* en = NENCLOSE(node);
1907 if (en->type == ENCLOSE_MEMORY) {
1908 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1909 (*counter)++;
1910 map[en->regnum].new_val = *counter;
1911 en->regnum = *counter;
1912 }
1913 else if (en->regnum != 0) {
1914 *plink = en->target;
1915 en->target = NULL_NODE;
1916 onig_node_free(node);
1917 r = noname_disable_map(plink, map, counter);
1918 break;
1919 }
1920 }
1921 r = noname_disable_map(&(en->target), map, counter);
1922 }
1923 break;
1924
1925 case NT_ANCHOR:
1926 if (NANCHOR(node)->target)
1927 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
1928 break;
1929
1930 default:
1931 break;
1932 }
1933
1934 return r;
1935}
1936
1937static int
1938renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
1939{
1940 int i, pos, n, old_num;
1941 int *backs;
1942 BRefNode* bn = NBREF(node);
1943
1944 if (! IS_BACKREF_NAME_REF(bn))
1945 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1946
1947 old_num = bn->back_num;
1948 if (IS_NULL(bn->back_dynamic))
1949 backs = bn->back_static;
1950 else
1951 backs = bn->back_dynamic;
1952
1953 for (i = 0, pos = 0; i < old_num; i++) {
1954 if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
1955 n = map[backs[i]].new_val;
1956 if (n > 0) {
1957 backs[pos] = n;
1958 pos++;
1959 }
1960 }
1961
1962 bn->back_num = pos;
1963 return 0;
1964}
1965
1966static int
1967renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
1968{
1969 int r = 0;
1970
1971 switch (NTYPE(node)) {
1972 case NT_LIST:
1973 case NT_ALT:
1974 do {
1975 r = renumber_by_map(NCAR(node), map, num_mem);
1976 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1977 break;
1978 case NT_QTFR:
1979 r = renumber_by_map(NQTFR(node)->target, map, num_mem);
1980 break;
1981 case NT_ENCLOSE:
1982 {
1983 EncloseNode* en = NENCLOSE(node);
1984 if (en->type == ENCLOSE_CONDITION) {
1985 if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
1986 en->regnum = map[en->regnum].new_val;
1987 }
1988 r = renumber_by_map(en->target, map, num_mem);
1989 }
1990 break;
1991
1992 case NT_BREF:
1993 r = renumber_node_backref(node, map, num_mem);
1994 break;
1995
1996 case NT_ANCHOR:
1997 if (NANCHOR(node)->target)
1998 r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
1999 break;
2000
2001 default:
2002 break;
2003 }
2004
2005 return r;
2006}
2007
2008static int
2009numbered_ref_check(Node* node)
2010{
2011 int r = 0;
2012
2013 switch (NTYPE(node)) {
2014 case NT_LIST:
2015 case NT_ALT:
2016 do {
2017 r = numbered_ref_check(NCAR(node));
2018 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2019 break;
2020 case NT_QTFR:
2021 r = numbered_ref_check(NQTFR(node)->target);
2022 break;
2023 case NT_ENCLOSE:
2024 r = numbered_ref_check(NENCLOSE(node)->target);
2025 break;
2026
2027 case NT_BREF:
2028 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2029 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2030 break;
2031
2032 case NT_ANCHOR:
2033 if (NANCHOR(node)->target)
2034 r = numbered_ref_check(NANCHOR(node)->target);
2035 break;
2036
2037 default:
2038 break;
2039 }
2040
2041 return r;
2042}
2043
2044static int
2045disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2046{
2047 int r, i, pos, counter;
2048 BitStatusType loc;
2049 GroupNumRemap* map;
2050
2051 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2052 CHECK_NULL_RETURN_MEMERR(map);
2053 for (i = 1; i <= env->num_mem; i++) {
2054 map[i].new_val = 0;
2055 }
2056 counter = 0;
2057 r = noname_disable_map(root, map, &counter);
2058 if (r != 0) return r;
2059
2060 r = renumber_by_map(*root, map, env->num_mem);
2061 if (r != 0) return r;
2062
2063 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2064 if (map[i].new_val > 0) {
2065 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2066 pos++;
2067 }
2068 }
2069
2070 loc = env->capture_history;
2071 BIT_STATUS_CLEAR(env->capture_history);
2072 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2073 if (BIT_STATUS_AT(loc, i)) {
2074 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2075 }
2076 }
2077
2078 env->num_mem = env->num_named;
2079 reg->num_mem = env->num_named;
2080
2081 return onig_renumber_name_table(reg, map);
2082}
2083#endif /* USE_NAMED_GROUP */
2084
2085#ifdef USE_SUBEXP_CALL
2086static int
2087unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2088{
2089 int i, offset;
2090 EncloseNode* en;
2091 AbsAddrType addr;
2092
2093 for (i = 0; i < uslist->num; i++) {
2094 en = NENCLOSE(uslist->us[i].target);
2095 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2096 addr = en->call_addr;
2097 offset = uslist->us[i].offset;
2098
2099 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2100 }
2101 return 0;
2102}
2103#endif
2104
2105#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2106static int
2107quantifiers_memory_node_info(Node* node)
2108{
2109 int r = 0;
2110
2111 switch (NTYPE(node)) {
2112 case NT_LIST:
2113 case NT_ALT:
2114 {
2115 int v;
2116 do {
2117 v = quantifiers_memory_node_info(NCAR(node));
2118 if (v > r) r = v;
2119 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2120 }
2121 break;
2122
2123# ifdef USE_SUBEXP_CALL
2124 case NT_CALL:
2125 if (IS_CALL_RECURSION(NCALL(node))) {
2126 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2127 }
2128 else
2129 r = quantifiers_memory_node_info(NCALL(node)->target);
2130 break;
2131# endif
2132
2133 case NT_QTFR:
2134 {
2135 QtfrNode* qn = NQTFR(node);
2136 if (qn->upper != 0) {
2137 r = quantifiers_memory_node_info(qn->target);
2138 }
2139 }
2140 break;
2141
2142 case NT_ENCLOSE:
2143 {
2144 EncloseNode* en = NENCLOSE(node);
2145 switch (en->type) {
2146 case ENCLOSE_MEMORY:
2147 return NQ_TARGET_IS_EMPTY_MEM;
2148 break;
2149
2150 case ENCLOSE_OPTION:
2151 case ENCLOSE_STOP_BACKTRACK:
2152 case ENCLOSE_CONDITION:
2153 case ENCLOSE_ABSENT:
2154 r = quantifiers_memory_node_info(en->target);
2155 break;
2156 default:
2157 break;
2158 }
2159 }
2160 break;
2161
2162 case NT_BREF:
2163 case NT_STR:
2164 case NT_CTYPE:
2165 case NT_CCLASS:
2166 case NT_CANY:
2167 case NT_ANCHOR:
2168 default:
2169 break;
2170 }
2171
2172 return r;
2173}
2174#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2175
2176static int
2177get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2178{
2179 OnigDistance tmin;
2180 int r = 0;
2181
2182 *min = 0;
2183 switch (NTYPE(node)) {
2184 case NT_BREF:
2185 {
2186 int i;
2187 int* backs;
2188 Node** nodes = SCANENV_MEM_NODES(env);
2189 BRefNode* br = NBREF(node);
2190 if (br->state & NST_RECURSION) break;
2191
2192 backs = BACKREFS_P(br);
2193 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2194 r = get_min_match_length(nodes[backs[0]], min, env);
2195 if (r != 0) break;
2196 for (i = 1; i < br->back_num; i++) {
2197 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2198 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2199 if (r != 0) break;
2200 if (*min > tmin) *min = tmin;
2201 }
2202 }
2203 break;
2204
2205#ifdef USE_SUBEXP_CALL
2206 case NT_CALL:
2207 if (IS_CALL_RECURSION(NCALL(node))) {
2208 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2209 if (IS_ENCLOSE_MIN_FIXED(en))
2210 *min = en->min_len;
2211 }
2212 else
2213 r = get_min_match_length(NCALL(node)->target, min, env);
2214 break;
2215#endif
2216
2217 case NT_LIST:
2218 do {
2219 r = get_min_match_length(NCAR(node), &tmin, env);
2220 if (r == 0) *min += tmin;
2221 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2222 break;
2223
2224 case NT_ALT:
2225 {
2226 Node *x, *y;
2227 y = node;
2228 do {
2229 x = NCAR(y);
2230 r = get_min_match_length(x, &tmin, env);
2231 if (r != 0) break;
2232 if (y == node) *min = tmin;
2233 else if (*min > tmin) *min = tmin;
2234 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2235 }
2236 break;
2237
2238 case NT_STR:
2239 {
2240 StrNode* sn = NSTR(node);
2241 *min = sn->end - sn->s;
2242 }
2243 break;
2244
2245 case NT_CTYPE:
2246 *min = 1;
2247 break;
2248
2249 case NT_CCLASS:
2250 case NT_CANY:
2251 *min = 1;
2252 break;
2253
2254 case NT_QTFR:
2255 {
2256 QtfrNode* qn = NQTFR(node);
2257
2258 if (qn->lower > 0) {
2259 r = get_min_match_length(qn->target, min, env);
2260 if (r == 0)
2261 *min = distance_multiply(*min, qn->lower);
2262 }
2263 }
2264 break;
2265
2266 case NT_ENCLOSE:
2267 {
2268 EncloseNode* en = NENCLOSE(node);
2269 switch (en->type) {
2270 case ENCLOSE_MEMORY:
2271 if (IS_ENCLOSE_MIN_FIXED(en))
2272 *min = en->min_len;
2273 else {
2274 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2275 *min = 0; /* recursive */
2276 else {
2277 SET_ENCLOSE_STATUS(node, NST_MARK1);
2278 r = get_min_match_length(en->target, min, env);
2279 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2280 if (r == 0) {
2281 en->min_len = *min;
2282 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2283 }
2284 }
2285 }
2286 break;
2287
2288 case ENCLOSE_OPTION:
2289 case ENCLOSE_STOP_BACKTRACK:
2290 case ENCLOSE_CONDITION:
2291 r = get_min_match_length(en->target, min, env);
2292 break;
2293
2294 case ENCLOSE_ABSENT:
2295 break;
2296 }
2297 }
2298 break;
2299
2300 case NT_ANCHOR:
2301 default:
2302 break;
2303 }
2304
2305 return r;
2306}
2307
2308static int
2309get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2310{
2311 OnigDistance tmax;
2312 int r = 0;
2313
2314 *max = 0;
2315 switch (NTYPE(node)) {
2316 case NT_LIST:
2317 do {
2318 r = get_max_match_length(NCAR(node), &tmax, env);
2319 if (r == 0)
2320 *max = distance_add(*max, tmax);
2321 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2322 break;
2323
2324 case NT_ALT:
2325 do {
2326 r = get_max_match_length(NCAR(node), &tmax, env);
2327 if (r == 0 && *max < tmax) *max = tmax;
2328 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2329 break;
2330
2331 case NT_STR:
2332 {
2333 StrNode* sn = NSTR(node);
2334 *max = sn->end - sn->s;
2335 }
2336 break;
2337
2338 case NT_CTYPE:
2339 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2340 break;
2341
2342 case NT_CCLASS:
2343 case NT_CANY:
2344 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2345 break;
2346
2347 case NT_BREF:
2348 {
2349 int i;
2350 int* backs;
2351 Node** nodes = SCANENV_MEM_NODES(env);
2352 BRefNode* br = NBREF(node);
2353 if (br->state & NST_RECURSION) {
2354 *max = ONIG_INFINITE_DISTANCE;
2355 break;
2356 }
2357 backs = BACKREFS_P(br);
2358 for (i = 0; i < br->back_num; i++) {
2359 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2360 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2361 if (r != 0) break;
2362 if (*max < tmax) *max = tmax;
2363 }
2364 }
2365 break;
2366
2367#ifdef USE_SUBEXP_CALL
2368 case NT_CALL:
2369 if (! IS_CALL_RECURSION(NCALL(node)))
2370 r = get_max_match_length(NCALL(node)->target, max, env);
2371 else
2372 *max = ONIG_INFINITE_DISTANCE;
2373 break;
2374#endif
2375
2376 case NT_QTFR:
2377 {
2378 QtfrNode* qn = NQTFR(node);
2379
2380 if (qn->upper != 0) {
2381 r = get_max_match_length(qn->target, max, env);
2382 if (r == 0 && *max != 0) {
2383 if (! IS_REPEAT_INFINITE(qn->upper))
2384 *max = distance_multiply(*max, qn->upper);
2385 else
2386 *max = ONIG_INFINITE_DISTANCE;
2387 }
2388 }
2389 }
2390 break;
2391
2392 case NT_ENCLOSE:
2393 {
2394 EncloseNode* en = NENCLOSE(node);
2395 switch (en->type) {
2396 case ENCLOSE_MEMORY:
2397 if (IS_ENCLOSE_MAX_FIXED(en))
2398 *max = en->max_len;
2399 else {
2400 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2401 *max = ONIG_INFINITE_DISTANCE;
2402 else {
2403 SET_ENCLOSE_STATUS(node, NST_MARK1);
2404 r = get_max_match_length(en->target, max, env);
2405 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2406 if (r == 0) {
2407 en->max_len = *max;
2408 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2409 }
2410 }
2411 }
2412 break;
2413
2414 case ENCLOSE_OPTION:
2415 case ENCLOSE_STOP_BACKTRACK:
2416 case ENCLOSE_CONDITION:
2417 r = get_max_match_length(en->target, max, env);
2418 break;
2419
2420 case ENCLOSE_ABSENT:
2421 break;
2422 }
2423 }
2424 break;
2425
2426 case NT_ANCHOR:
2427 default:
2428 break;
2429 }
2430
2431 return r;
2432}
2433
2434#define GET_CHAR_LEN_VARLEN -1
2435#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2436
2437/* fixed size pattern node only */
2438static int
2439get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2440{
2441 int tlen;
2442 int r = 0;
2443
2444 level++;
2445 *len = 0;
2446 switch (NTYPE(node)) {
2447 case NT_LIST:
2448 do {
2449 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2450 if (r == 0)
2451 *len = (int )distance_add(*len, tlen);
2452 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2453 break;
2454
2455 case NT_ALT:
2456 {
2457 int tlen2;
2458 int varlen = 0;
2459
2460 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2461 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2462 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2463 if (r == 0) {
2464 if (tlen != tlen2)
2465 varlen = 1;
2466 }
2467 }
2468 if (r == 0) {
2469 if (varlen != 0) {
2470 if (level == 1)
2471 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2472 else
2473 r = GET_CHAR_LEN_VARLEN;
2474 }
2475 else
2476 *len = tlen;
2477 }
2478 }
2479 break;
2480
2481 case NT_STR:
2482 {
2483 StrNode* sn = NSTR(node);
2484 UChar *s = sn->s;
2485 while (s < sn->end) {
2486 s += enclen(reg->enc, s, sn->end);
2487 (*len)++;
2488 }
2489 }
2490 break;
2491
2492 case NT_QTFR:
2493 {
2494 QtfrNode* qn = NQTFR(node);
2495 if (qn->lower == qn->upper) {
2496 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2497 if (r == 0)
2498 *len = (int )distance_multiply(tlen, qn->lower);
2499 }
2500 else
2501 r = GET_CHAR_LEN_VARLEN;
2502 }
2503 break;
2504
2505#ifdef USE_SUBEXP_CALL
2506 case NT_CALL:
2507 if (! IS_CALL_RECURSION(NCALL(node)))
2508 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2509 else
2510 r = GET_CHAR_LEN_VARLEN;
2511 break;
2512#endif
2513
2514 case NT_CTYPE:
2515 *len = 1;
2516 break;
2517
2518 case NT_CCLASS:
2519 case NT_CANY:
2520 *len = 1;
2521 break;
2522
2523 case NT_ENCLOSE:
2524 {
2525 EncloseNode* en = NENCLOSE(node);
2526 switch (en->type) {
2527 case ENCLOSE_MEMORY:
2528#ifdef USE_SUBEXP_CALL
2529 if (IS_ENCLOSE_CLEN_FIXED(en))
2530 *len = en->char_len;
2531 else {
2532 r = get_char_length_tree1(en->target, reg, len, level);
2533 if (r == 0) {
2534 en->char_len = *len;
2535 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2536 }
2537 }
2538 break;
2539#endif
2540 case ENCLOSE_OPTION:
2541 case ENCLOSE_STOP_BACKTRACK:
2542 case ENCLOSE_CONDITION:
2543 r = get_char_length_tree1(en->target, reg, len, level);
2544 break;
2545 case ENCLOSE_ABSENT:
2546 default:
2547 break;
2548 }
2549 }
2550 break;
2551
2552 case NT_ANCHOR:
2553 break;
2554
2555 default:
2556 r = GET_CHAR_LEN_VARLEN;
2557 break;
2558 }
2559
2560 return r;
2561}
2562
2563static int
2564get_char_length_tree(Node* node, regex_t* reg, int* len)
2565{
2566 return get_char_length_tree1(node, reg, len, 0);
2567}
2568
2569/* x is not included y ==> 1 : 0 */
2570static int
2571is_not_included(Node* x, Node* y, regex_t* reg)
2572{
2573 int i;
2574 OnigDistance len;
2575 OnigCodePoint code;
2576 UChar *p;
2577 int ytype;
2578
2579 retry:
2580 ytype = NTYPE(y);
2581 switch (NTYPE(x)) {
2582 case NT_CTYPE:
2583 {
2584 switch (ytype) {
2585 case NT_CTYPE:
2586 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2587 NCTYPE(y)->not != NCTYPE(x)->not &&
2588 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2589 return 1;
2590 else
2591 return 0;
2592 break;
2593
2594 case NT_CCLASS:
2595 swap:
2596 {
2597 Node* tmp;
2598 tmp = x; x = y; y = tmp;
2599 goto retry;
2600 }
2601 break;
2602
2603 case NT_STR:
2604 goto swap;
2605 break;
2606
2607 default:
2608 break;
2609 }
2610 }
2611 break;
2612
2613 case NT_CCLASS:
2614 {
2615 CClassNode* xc = NCCLASS(x);
2616 switch (ytype) {
2617 case NT_CTYPE:
2618 switch (NCTYPE(y)->ctype) {
2619 case ONIGENC_CTYPE_WORD:
2620 if (NCTYPE(y)->not == 0) {
2621 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2622 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2623 if (BITSET_AT(xc->bs, i)) {
2624 if (NCTYPE(y)->ascii_range) {
2625 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2626 }
2627 else {
2628 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2629 }
2630 }
2631 }
2632 return 1;
2633 }
2634 return 0;
2635 }
2636 else {
2637 if (IS_NOT_NULL(xc->mbuf)) return 0;
2638 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2639 int is_word;
2640 if (NCTYPE(y)->ascii_range)
2641 is_word = IS_CODE_SB_WORD(reg->enc, i);
2642 else
2643 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2644 if (! is_word) {
2645 if (!IS_NCCLASS_NOT(xc)) {
2646 if (BITSET_AT(xc->bs, i))
2647 return 0;
2648 }
2649 else {
2650 if (! BITSET_AT(xc->bs, i))
2651 return 0;
2652 }
2653 }
2654 }
2655 return 1;
2656 }
2657 break;
2658
2659 default:
2660 break;
2661 }
2662 break;
2663
2664 case NT_CCLASS:
2665 {
2666 int v;
2667 CClassNode* yc = NCCLASS(y);
2668
2669 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2670 v = BITSET_AT(xc->bs, i);
2671 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2672 (v == 0 && IS_NCCLASS_NOT(xc))) {
2673 v = BITSET_AT(yc->bs, i);
2674 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2675 (v == 0 && IS_NCCLASS_NOT(yc)))
2676 return 0;
2677 }
2678 }
2679 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2680 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2681 return 1;
2682 return 0;
2683 }
2684 break;
2685
2686 case NT_STR:
2687 goto swap;
2688 break;
2689
2690 default:
2691 break;
2692 }
2693 }
2694 break;
2695
2696 case NT_STR:
2697 {
2698 StrNode* xs = NSTR(x);
2699 if (NSTRING_LEN(x) == 0)
2700 break;
2701
2702 switch (ytype) {
2703 case NT_CTYPE:
2704 switch (NCTYPE(y)->ctype) {
2705 case ONIGENC_CTYPE_WORD:
2706 if (NCTYPE(y)->ascii_range) {
2707 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2708 return NCTYPE(y)->not;
2709 else
2710 return !(NCTYPE(y)->not);
2711 }
2712 else {
2713 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2714 return NCTYPE(y)->not;
2715 else
2716 return !(NCTYPE(y)->not);
2717 }
2718 break;
2719 default:
2720 break;
2721 }
2722 break;
2723
2724 case NT_CCLASS:
2725 {
2726 CClassNode* cc = NCCLASS(y);
2727
2728 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2729 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2730 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2731 }
2732 break;
2733
2734 case NT_STR:
2735 {
2736 UChar *q;
2737 StrNode* ys = NSTR(y);
2738 len = NSTRING_LEN(x);
2739 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2740 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2741 /* tiny version */
2742 return 0;
2743 }
2744 else {
2745 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2746 if (*p != *q) return 1;
2747 }
2748 }
2749 }
2750 break;
2751
2752 default:
2753 break;
2754 }
2755 }
2756 break;
2757
2758 default:
2759 break;
2760 }
2761
2762 return 0;
2763}
2764
2765static Node*
2766get_head_value_node(Node* node, int exact, regex_t* reg)
2767{
2768 Node* n = NULL_NODE;
2769
2770 switch (NTYPE(node)) {
2771 case NT_BREF:
2772 case NT_ALT:
2773 case NT_CANY:
2774#ifdef USE_SUBEXP_CALL
2775 case NT_CALL:
2776#endif
2777 break;
2778
2779 case NT_CTYPE:
2780 case NT_CCLASS:
2781 if (exact == 0) {
2782 n = node;
2783 }
2784 break;
2785
2786 case NT_LIST:
2787 n = get_head_value_node(NCAR(node), exact, reg);
2788 break;
2789
2790 case NT_STR:
2791 {
2792 StrNode* sn = NSTR(node);
2793
2794 if (sn->end <= sn->s)
2795 break;
2796
2797 if (exact != 0 &&
2798 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2799 }
2800 else {
2801 n = node;
2802 }
2803 }
2804 break;
2805
2806 case NT_QTFR:
2807 {
2808 QtfrNode* qn = NQTFR(node);
2809 if (qn->lower > 0) {
2810#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2811 if (IS_NOT_NULL(qn->head_exact))
2812 n = qn->head_exact;
2813 else
2814#endif
2815 n = get_head_value_node(qn->target, exact, reg);
2816 }
2817 }
2818 break;
2819
2820 case NT_ENCLOSE:
2821 {
2822 EncloseNode* en = NENCLOSE(node);
2823 switch (en->type) {
2824 case ENCLOSE_OPTION:
2825 {
2826 OnigOptionType options = reg->options;
2827
2828 reg->options = NENCLOSE(node)->option;
2829 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2830 reg->options = options;
2831 }
2832 break;
2833
2834 case ENCLOSE_MEMORY:
2835 case ENCLOSE_STOP_BACKTRACK:
2836 case ENCLOSE_CONDITION:
2837 n = get_head_value_node(en->target, exact, reg);
2838 break;
2839
2840 case ENCLOSE_ABSENT:
2841 break;
2842 }
2843 }
2844 break;
2845
2846 case NT_ANCHOR:
2847 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2848 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2849 break;
2850
2851 default:
2852 break;
2853 }
2854
2855 return n;
2856}
2857
2858static int
2859check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2860{
2861 int type, r = 0;
2862
2863 type = NTYPE(node);
2864 if ((NTYPE2BIT(type) & type_mask) == 0)
2865 return 1;
2866
2867 switch (type) {
2868 case NT_LIST:
2869 case NT_ALT:
2870 do {
2871 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2872 anchor_mask);
2873 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2874 break;
2875
2876 case NT_QTFR:
2877 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2878 anchor_mask);
2879 break;
2880
2881 case NT_ENCLOSE:
2882 {
2883 EncloseNode* en = NENCLOSE(node);
2884 if ((en->type & enclose_mask) == 0)
2885 return 1;
2886
2887 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2888 }
2889 break;
2890
2891 case NT_ANCHOR:
2892 type = NANCHOR(node)->type;
2893 if ((type & anchor_mask) == 0)
2894 return 1;
2895
2896 if (NANCHOR(node)->target)
2897 r = check_type_tree(NANCHOR(node)->target,
2898 type_mask, enclose_mask, anchor_mask);
2899 break;
2900
2901 default:
2902 break;
2903 }
2904 return r;
2905}
2906
2907#ifdef USE_SUBEXP_CALL
2908
2909# define RECURSION_EXIST 1
2910# define RECURSION_INFINITE 2
2911
2912static int
2913subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2914{
2915 int type;
2916 int r = 0;
2917
2918 type = NTYPE(node);
2919 switch (type) {
2920 case NT_LIST:
2921 {
2922 Node *x;
2923 OnigDistance min;
2924 int ret;
2925
2926 x = node;
2927 do {
2928 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2929 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2930 r |= ret;
2931 if (head) {
2932 ret = get_min_match_length(NCAR(x), &min, env);
2933 if (ret != 0) return ret;
2934 if (min != 0) head = 0;
2935 }
2936 } while (IS_NOT_NULL(x = NCDR(x)));
2937 }
2938 break;
2939
2940 case NT_ALT:
2941 {
2942 int ret;
2943 r = RECURSION_EXIST;
2944 do {
2945 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2946 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2947 r &= ret;
2948 } while (IS_NOT_NULL(node = NCDR(node)));
2949 }
2950 break;
2951
2952 case NT_QTFR:
2953 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2954 if (r == RECURSION_EXIST) {
2955 if (NQTFR(node)->lower == 0) r = 0;
2956 }
2957 break;
2958
2959 case NT_ANCHOR:
2960 {
2961 AnchorNode* an = NANCHOR(node);
2962 switch (an->type) {
2963 case ANCHOR_PREC_READ:
2964 case ANCHOR_PREC_READ_NOT:
2965 case ANCHOR_LOOK_BEHIND:
2966 case ANCHOR_LOOK_BEHIND_NOT:
2967 r = subexp_inf_recursive_check(an->target, env, head);
2968 break;
2969 }
2970 }
2971 break;
2972
2973 case NT_CALL:
2974 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2975 break;
2976
2977 case NT_ENCLOSE:
2978 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2979 return 0;
2980 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2981 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2982 else {
2983 SET_ENCLOSE_STATUS(node, NST_MARK2);
2984 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2985 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2986 }
2987 break;
2988
2989 default:
2990 break;
2991 }
2992
2993 return r;
2994}
2995
2996static int
2997subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2998{
2999 int type;
3000 int r = 0;
3001
3002 type = NTYPE(node);
3003 switch (type) {
3004 case NT_LIST:
3005 case NT_ALT:
3006 do {
3007 r = subexp_inf_recursive_check_trav(NCAR(node), env);
3008 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3009 break;
3010
3011 case NT_QTFR:
3012 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
3013 break;
3014
3015 case NT_ANCHOR:
3016 {
3017 AnchorNode* an = NANCHOR(node);
3018 switch (an->type) {
3019 case ANCHOR_PREC_READ:
3020 case ANCHOR_PREC_READ_NOT:
3021 case ANCHOR_LOOK_BEHIND:
3022 case ANCHOR_LOOK_BEHIND_NOT:
3023 r = subexp_inf_recursive_check_trav(an->target, env);
3024 break;
3025 }
3026 }
3027 break;
3028
3029 case NT_ENCLOSE:
3030 {
3031 EncloseNode* en = NENCLOSE(node);
3032
3033 if (IS_ENCLOSE_RECURSION(en)) {
3034 SET_ENCLOSE_STATUS(node, NST_MARK1);
3035 r = subexp_inf_recursive_check(en->target, env, 1);
3036 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3037 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3038 }
3039 r = subexp_inf_recursive_check_trav(en->target, env);
3040 }
3041
3042 break;
3043
3044 default:
3045 break;
3046 }
3047
3048 return r;
3049}
3050
3051static int
3052subexp_recursive_check(Node* node)
3053{
3054 int r = 0;
3055
3056 switch (NTYPE(node)) {
3057 case NT_LIST:
3058 case NT_ALT:
3059 do {
3060 r |= subexp_recursive_check(NCAR(node));
3061 } while (IS_NOT_NULL(node = NCDR(node)));
3062 break;
3063
3064 case NT_QTFR:
3065 r = subexp_recursive_check(NQTFR(node)->target);
3066 break;
3067
3068 case NT_ANCHOR:
3069 {
3070 AnchorNode* an = NANCHOR(node);
3071 switch (an->type) {
3072 case ANCHOR_PREC_READ:
3073 case ANCHOR_PREC_READ_NOT:
3074 case ANCHOR_LOOK_BEHIND:
3075 case ANCHOR_LOOK_BEHIND_NOT:
3076 r = subexp_recursive_check(an->target);
3077 break;
3078 }
3079 }
3080 break;
3081
3082 case NT_CALL:
3083 r = subexp_recursive_check(NCALL(node)->target);
3084 if (r != 0) SET_CALL_RECURSION(node);
3085 break;
3086
3087 case NT_ENCLOSE:
3088 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3089 return 0;
3090 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3091 return 1; /* recursion */
3092 else {
3093 SET_ENCLOSE_STATUS(node, NST_MARK2);
3094 r = subexp_recursive_check(NENCLOSE(node)->target);
3095 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3096 }
3097 break;
3098
3099 default:
3100 break;
3101 }
3102
3103 return r;
3104}
3105
3106
3107static int
3108subexp_recursive_check_trav(Node* node, ScanEnv* env)
3109{
3110# define FOUND_CALLED_NODE 1
3111
3112 int type;
3113 int r = 0;
3114
3115 type = NTYPE(node);
3116 switch (type) {
3117 case NT_LIST:
3118 case NT_ALT:
3119 {
3120 int ret;
3121 do {
3122 ret = subexp_recursive_check_trav(NCAR(node), env);
3123 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3124 else if (ret < 0) return ret;
3125 } while (IS_NOT_NULL(node = NCDR(node)));
3126 }
3127 break;
3128
3129 case NT_QTFR:
3130 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3131 if (NQTFR(node)->upper == 0) {
3132 if (r == FOUND_CALLED_NODE)
3133 NQTFR(node)->is_referred = 1;
3134 }
3135 break;
3136
3137 case NT_ANCHOR:
3138 {
3139 AnchorNode* an = NANCHOR(node);
3140 switch (an->type) {
3141 case ANCHOR_PREC_READ:
3142 case ANCHOR_PREC_READ_NOT:
3143 case ANCHOR_LOOK_BEHIND:
3144 case ANCHOR_LOOK_BEHIND_NOT:
3145 r = subexp_recursive_check_trav(an->target, env);
3146 break;
3147 }
3148 }
3149 break;
3150
3151 case NT_ENCLOSE:
3152 {
3153 EncloseNode* en = NENCLOSE(node);
3154
3155 if (! IS_ENCLOSE_RECURSION(en)) {
3156 if (IS_ENCLOSE_CALLED(en)) {
3157 SET_ENCLOSE_STATUS(node, NST_MARK1);
3158 r = subexp_recursive_check(en->target);
3159 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3160 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3161 }
3162 }
3163 r = subexp_recursive_check_trav(en->target, env);
3164 if (IS_ENCLOSE_CALLED(en))
3165 r |= FOUND_CALLED_NODE;
3166 }
3167 break;
3168
3169 default:
3170 break;
3171 }
3172
3173 return r;
3174}
3175
3176static int
3177setup_subexp_call(Node* node, ScanEnv* env)
3178{
3179 int type;
3180 int r = 0;
3181
3182 type = NTYPE(node);
3183 switch (type) {
3184 case NT_LIST:
3185 do {
3186 r = setup_subexp_call(NCAR(node), env);
3187 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3188 break;
3189
3190 case NT_ALT:
3191 do {
3192 r = setup_subexp_call(NCAR(node), env);
3193 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3194 break;
3195
3196 case NT_QTFR:
3197 r = setup_subexp_call(NQTFR(node)->target, env);
3198 break;
3199 case NT_ENCLOSE:
3200 r = setup_subexp_call(NENCLOSE(node)->target, env);
3201 break;
3202
3203 case NT_CALL:
3204 {
3205 CallNode* cn = NCALL(node);
3206 Node** nodes = SCANENV_MEM_NODES(env);
3207
3208 if (cn->group_num != 0) {
3209 int gnum = cn->group_num;
3210
3211# ifdef USE_NAMED_GROUP
3212 if (env->num_named > 0 &&
3213 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3214 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3215 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3216 }
3217# endif
3218 if (gnum > env->num_mem) {
3219 onig_scan_env_set_error_string(env,
3220 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3221 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3222 }
3223
3224# ifdef USE_NAMED_GROUP
3225 set_call_attr:
3226# endif
3227 cn->target = nodes[cn->group_num];
3228 if (IS_NULL(cn->target)) {
3229 onig_scan_env_set_error_string(env,
3230 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3231 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3232 }
3233 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3234 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3235 cn->unset_addr_list = env->unset_addr_list;
3236 }
3237# ifdef USE_NAMED_GROUP
3238# ifdef USE_PERL_SUBEXP_CALL
3239 else if (cn->name == cn->name_end) {
3240 goto set_call_attr;
3241 }
3242# endif
3243 else {
3244 int *refs;
3245
3246 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3247 &refs);
3248 if (n <= 0) {
3249 onig_scan_env_set_error_string(env,
3250 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3251 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3252 }
3253 else if (n > 1 &&
3254 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3255 onig_scan_env_set_error_string(env,
3256 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3257 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3258 }
3259 else {
3260 cn->group_num = refs[0];
3261 goto set_call_attr;
3262 }
3263 }
3264# endif
3265 }
3266 break;
3267
3268 case NT_ANCHOR:
3269 {
3270 AnchorNode* an = NANCHOR(node);
3271
3272 switch (an->type) {
3273 case ANCHOR_PREC_READ:
3274 case ANCHOR_PREC_READ_NOT:
3275 case ANCHOR_LOOK_BEHIND:
3276 case ANCHOR_LOOK_BEHIND_NOT:
3277 r = setup_subexp_call(an->target, env);
3278 break;
3279 }
3280 }
3281 break;
3282
3283 default:
3284 break;
3285 }
3286
3287 return r;
3288}
3289#endif
3290
3291/* divide different length alternatives in look-behind.
3292 (?<=A|B) ==> (?<=A)|(?<=B)
3293 (?<!A|B) ==> (?<!A)(?<!B)
3294*/
3295static int
3296divide_look_behind_alternatives(Node* node)
3297{
3298 Node *head, *np, *insert_node;
3299 AnchorNode* an = NANCHOR(node);
3300 int anc_type = an->type;
3301
3302 head = an->target;
3303 np = NCAR(head);
3304 swap_node(node, head);
3305 NCAR(node) = head;
3306 NANCHOR(head)->target = np;
3307
3308 np = node;
3309 while ((np = NCDR(np)) != NULL_NODE) {
3310 insert_node = onig_node_new_anchor(anc_type);
3311 CHECK_NULL_RETURN_MEMERR(insert_node);
3312 NANCHOR(insert_node)->target = NCAR(np);
3313 NCAR(np) = insert_node;
3314 }
3315
3316 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3317 np = node;
3318 do {
3319 SET_NTYPE(np, NT_LIST); /* alt -> list */
3320 } while ((np = NCDR(np)) != NULL_NODE);
3321 }
3322 return 0;
3323}
3324
3325static int
3326setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3327{
3328 int r, len;
3329 AnchorNode* an = NANCHOR(node);
3330
3331 r = get_char_length_tree(an->target, reg, &len);
3332 if (r == 0)
3333 an->char_len = len;
3334 else if (r == GET_CHAR_LEN_VARLEN)
3335 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3336 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3337 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3338 r = divide_look_behind_alternatives(node);
3339 else
3340 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3341 }
3342
3343 return r;
3344}
3345
3346static int
3347next_setup(Node* node, Node* next_node, regex_t* reg)
3348{
3349 int type;
3350
3351 retry:
3352 type = NTYPE(node);
3353 if (type == NT_QTFR) {
3354 QtfrNode* qn = NQTFR(node);
3355 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3356#ifdef USE_QTFR_PEEK_NEXT
3357 Node* n = get_head_value_node(next_node, 1, reg);
3358 /* '\0': for UTF-16BE etc... */
3359 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3360 qn->next_head_exact = n;
3361 }
3362#endif
3363 /* automatic possessification a*b ==> (?>a*)b */
3364 if (qn->lower <= 1) {
3365 int ttype = NTYPE(qn->target);
3366 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3367 Node *x, *y;
3368 x = get_head_value_node(qn->target, 0, reg);
3369 if (IS_NOT_NULL(x)) {
3370 y = get_head_value_node(next_node, 0, reg);
3371 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3372 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3373 CHECK_NULL_RETURN_MEMERR(en);
3374 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3375 swap_node(node, en);
3376 NENCLOSE(node)->target = en;
3377 }
3378 }
3379 }
3380 }
3381 }
3382 }
3383 else if (type == NT_ENCLOSE) {
3384 EncloseNode* en = NENCLOSE(node);
3385 if (en->type == ENCLOSE_MEMORY) {
3386 node = en->target;
3387 goto retry;
3388 }
3389 }
3390 return 0;
3391}
3392
3393
3394static int
3395update_string_node_case_fold(regex_t* reg, Node *node)
3396{
3397 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3398 UChar *sbuf, *ebuf, *sp;
3399 int r, i, len;
3400 OnigDistance sbuf_size;
3401 StrNode* sn = NSTR(node);
3402
3403 end = sn->end;
3404 sbuf_size = (end - sn->s) * 2;
3405 sbuf = (UChar* )xmalloc(sbuf_size);
3406 CHECK_NULL_RETURN_MEMERR(sbuf);
3407 ebuf = sbuf + sbuf_size;
3408
3409 sp = sbuf;
3410 p = sn->s;
3411 while (p < end) {
3412 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3413 for (i = 0; i < len; i++) {
3414 if (sp >= ebuf) {
3415 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3416 if (IS_NULL(p)) {
3417 xfree(sbuf);
3418 return ONIGERR_MEMORY;
3419 }
3420 sbuf = p;
3421 sp = sbuf + sbuf_size;
3422 sbuf_size *= 2;
3423 ebuf = sbuf + sbuf_size;
3424 }
3425
3426 *sp++ = buf[i];
3427 }
3428 }
3429
3430 r = onig_node_str_set(node, sbuf, sp);
3431
3432 xfree(sbuf);
3433 return r;
3434}
3435
3436static int
3437expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3438 regex_t* reg)
3439{
3440 int r;
3441 Node *node;
3442
3443 node = onig_node_new_str(s, end);
3444 if (IS_NULL(node)) return ONIGERR_MEMORY;
3445
3446 r = update_string_node_case_fold(reg, node);
3447 if (r != 0) {
3448 onig_node_free(node);
3449 return r;
3450 }
3451
3452 NSTRING_SET_AMBIG(node);
3453 NSTRING_SET_DONT_GET_OPT_INFO(node);
3454 *rnode = node;
3455 return 0;
3456}
3457
3458static int
3459is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3460 int slen)
3461{
3462 int i;
3463
3464 for (i = 0; i < item_num; i++) {
3465 if (items[i].byte_len != slen) {
3466 return 1;
3467 }
3468 if (items[i].code_len != 1) {
3469 return 1;
3470 }
3471 }
3472 return 0;
3473}
3474
3475static int
3476expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3477 UChar *p, int slen, UChar *end,
3478 regex_t* reg, Node **rnode)
3479{
3480 int r, i, j, len, varlen;
3481 Node *anode, *var_anode, *snode, *xnode, *an;
3482 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3483
3484 *rnode = var_anode = NULL_NODE;
3485
3486 varlen = 0;
3487 for (i = 0; i < item_num; i++) {
3488 if (items[i].byte_len != slen) {
3489 varlen = 1;
3490 break;
3491 }
3492 }
3493
3494 if (varlen != 0) {
3495 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3496 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3497
3498 xnode = onig_node_new_list(NULL, NULL);
3499 if (IS_NULL(xnode)) goto mem_err;
3500 NCAR(var_anode) = xnode;
3501
3502 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3503 if (IS_NULL(anode)) goto mem_err;
3504 NCAR(xnode) = anode;
3505 }
3506 else {
3507 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3508 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3509 }
3510
3511 snode = onig_node_new_str(p, p + slen);
3512 if (IS_NULL(snode)) goto mem_err;
3513
3514 NCAR(anode) = snode;
3515
3516 for (i = 0; i < item_num; i++) {
3517 snode = onig_node_new_str(NULL, NULL);
3518 if (IS_NULL(snode)) goto mem_err;
3519
3520 for (j = 0; j < items[i].code_len; j++) {
3521 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3522 if (len < 0) {
3523 r = len;
3524 goto mem_err2;
3525 }
3526
3527 r = onig_node_str_cat(snode, buf, buf + len);
3528 if (r != 0) goto mem_err2;
3529 }
3530
3531 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3532 if (IS_NULL(an)) {
3533 goto mem_err2;
3534 }
3535
3536 if (items[i].byte_len != slen) {
3537 Node *rem;
3538 UChar *q = p + items[i].byte_len;
3539
3540 if (q < end) {
3541 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3542 if (r != 0) {
3543 onig_node_free(an);
3544 goto mem_err2;
3545 }
3546
3547 xnode = onig_node_list_add(NULL_NODE, snode);
3548 if (IS_NULL(xnode)) {
3549 onig_node_free(an);
3550 onig_node_free(rem);
3551 goto mem_err2;
3552 }
3553 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3554 onig_node_free(an);
3555 onig_node_free(xnode);
3556 onig_node_free(rem);
3557 goto mem_err;
3558 }
3559
3560 NCAR(an) = xnode;
3561 }
3562 else {
3563 NCAR(an) = snode;
3564 }
3565
3566 NCDR(var_anode) = an;
3567 var_anode = an;
3568 }
3569 else {
3570 NCAR(an) = snode;
3571 NCDR(anode) = an;
3572 anode = an;
3573 }
3574 }
3575
3576 return varlen;
3577
3578 mem_err2:
3579 onig_node_free(snode);
3580
3581 mem_err:
3582 onig_node_free(*rnode);
3583
3584 return ONIGERR_MEMORY;
3585}
3586
3587static int
3588expand_case_fold_string(Node* node, regex_t* reg)
3589{
3590#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3591
3592 int r, n, len, alt_num;
3593 int varlen = 0;
3594 UChar *start, *end, *p;
3595 Node *top_root, *root, *snode, *prev_node;
3596 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3597 StrNode* sn = NSTR(node);
3598
3599 if (NSTRING_IS_AMBIG(node)) return 0;
3600
3601 start = sn->s;
3602 end = sn->end;
3603 if (start >= end) return 0;
3604
3605 r = 0;
3606 top_root = root = prev_node = snode = NULL_NODE;
3607 alt_num = 1;
3608 p = start;
3609 while (p < end) {
3610 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3611 p, end, items);
3612 if (n < 0) {
3613 r = n;
3614 goto err;
3615 }
3616
3617 len = enclen(reg->enc, p, end);
3618
3619 varlen = is_case_fold_variable_len(n, items, len);
3620 if (n == 0 || varlen == 0) {
3621 if (IS_NULL(snode)) {
3622 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3623 onig_node_free(top_root);
3624 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3625 if (IS_NULL(root)) {
3626 onig_node_free(prev_node);
3627 goto mem_err;
3628 }
3629 }
3630
3631 prev_node = snode = onig_node_new_str(NULL, NULL);
3632 if (IS_NULL(snode)) goto mem_err;
3633 if (IS_NOT_NULL(root)) {
3634 if (IS_NULL(onig_node_list_add(root, snode))) {
3635 onig_node_free(snode);
3636 goto mem_err;
3637 }
3638 }
3639 }
3640
3641 r = onig_node_str_cat(snode, p, p + len);
3642 if (r != 0) goto err;
3643 }
3644 else {
3645 alt_num *= (n + 1);
3646 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3647
3648 if (IS_NOT_NULL(snode)) {
3649 r = update_string_node_case_fold(reg, snode);
3650 if (r == 0) {
3651 NSTRING_SET_AMBIG(snode);
3652 }
3653 }
3654 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3655 onig_node_free(top_root);
3656 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3657 if (IS_NULL(root)) {
3658 onig_node_free(prev_node);
3659 goto mem_err;
3660 }
3661 }
3662
3663 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3664 if (r < 0) goto mem_err;
3665 if (r == 1) {
3666 if (IS_NULL(root)) {
3667 top_root = prev_node;
3668 }
3669 else {
3670 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3671 onig_node_free(prev_node);
3672 goto mem_err;
3673 }
3674 }
3675
3676 root = NCAR(prev_node);
3677 }
3678 else { /* r == 0 */
3679 if (IS_NOT_NULL(root)) {
3680 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3681 onig_node_free(prev_node);
3682 goto mem_err;
3683 }
3684 }
3685 }
3686
3687 snode = NULL_NODE;
3688 }
3689
3690 p += len;
3691 }
3692 if (IS_NOT_NULL(snode)) {
3693 r = update_string_node_case_fold(reg, snode);
3694 if (r == 0) {
3695 NSTRING_SET_AMBIG(snode);
3696 }
3697 }
3698
3699 if (p < end) {
3700 Node *srem;
3701
3702 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3703 if (r != 0) goto mem_err;
3704
3705 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3706 onig_node_free(top_root);
3707 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3708 if (IS_NULL(root)) {
3709 onig_node_free(srem);
3710 onig_node_free(prev_node);
3711 goto mem_err;
3712 }
3713 }
3714
3715 if (IS_NULL(root)) {
3716 prev_node = srem;
3717 }
3718 else {
3719 if (IS_NULL(onig_node_list_add(root, srem))) {
3720 onig_node_free(srem);
3721 goto mem_err;
3722 }
3723 }
3724 }
3725
3726 /* ending */
3727 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3728 swap_node(node, top_root);
3729 onig_node_free(top_root);
3730 return 0;
3731
3732 mem_err:
3733 r = ONIGERR_MEMORY;
3734
3735 err:
3736 onig_node_free(top_root);
3737 return r;
3738}
3739
3740
3741#ifdef USE_COMBINATION_EXPLOSION_CHECK
3742
3743# define CEC_THRES_NUM_BIG_REPEAT 512
3744# define CEC_INFINITE_NUM 0x7fffffff
3745
3746# define CEC_IN_INFINITE_REPEAT (1<<0)
3747# define CEC_IN_FINITE_REPEAT (1<<1)
3748# define CEC_CONT_BIG_REPEAT (1<<2)
3749
3750static int
3751setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3752{
3753 int type;
3754 int r = state;
3755
3756 type = NTYPE(node);
3757 switch (type) {
3758 case NT_LIST:
3759 {
3760 do {
3761 r = setup_comb_exp_check(NCAR(node), r, env);
3762 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3763 }
3764 break;
3765
3766 case NT_ALT:
3767 {
3768 int ret;
3769 do {
3770 ret = setup_comb_exp_check(NCAR(node), state, env);
3771 r |= ret;
3772 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3773 }
3774 break;
3775
3776 case NT_QTFR:
3777 {
3778 int child_state = state;
3779 int add_state = 0;
3780 QtfrNode* qn = NQTFR(node);
3781 Node* target = qn->target;
3782 int var_num;
3783
3784 if (! IS_REPEAT_INFINITE(qn->upper)) {
3785 if (qn->upper > 1) {
3786 /* {0,1}, {1,1} are allowed */
3787 child_state |= CEC_IN_FINITE_REPEAT;
3788
3789 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3790 if (env->backrefed_mem == 0) {
3791 if (NTYPE(qn->target) == NT_ENCLOSE) {
3792 EncloseNode* en = NENCLOSE(qn->target);
3793 if (en->type == ENCLOSE_MEMORY) {
3794 if (NTYPE(en->target) == NT_QTFR) {
3795 QtfrNode* q = NQTFR(en->target);
3796 if (IS_REPEAT_INFINITE(q->upper)
3797 && q->greedy == qn->greedy) {
3798 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3799 if (qn->upper == 1)
3800 child_state = state;
3801 }
3802 }
3803 }
3804 }
3805 }
3806 }
3807 }
3808
3809 if (state & CEC_IN_FINITE_REPEAT) {
3810 qn->comb_exp_check_num = -1;
3811 }
3812 else {
3813 if (IS_REPEAT_INFINITE(qn->upper)) {
3814 var_num = CEC_INFINITE_NUM;
3815 child_state |= CEC_IN_INFINITE_REPEAT;
3816 }
3817 else {
3818 var_num = qn->upper - qn->lower;
3819 }
3820
3821 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3822 add_state |= CEC_CONT_BIG_REPEAT;
3823
3824 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3825 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3826 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3827 if (qn->comb_exp_check_num == 0) {
3828 env->num_comb_exp_check++;
3829 qn->comb_exp_check_num = env->num_comb_exp_check;
3830 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3831 env->comb_exp_max_regnum = env->curr_max_regnum;
3832 }
3833 }
3834 }
3835
3836 r = setup_comb_exp_check(target, child_state, env);
3837 r |= add_state;
3838 }
3839 break;
3840
3841 case NT_ENCLOSE:
3842 {
3843 EncloseNode* en = NENCLOSE(node);
3844
3845 switch (en->type) {
3846 case ENCLOSE_MEMORY:
3847 {
3848 if (env->curr_max_regnum < en->regnum)
3849 env->curr_max_regnum = en->regnum;
3850
3851 r = setup_comb_exp_check(en->target, state, env);
3852 }
3853 break;
3854
3855 default:
3856 r = setup_comb_exp_check(en->target, state, env);
3857 break;
3858 }
3859 }
3860 break;
3861
3862# ifdef USE_SUBEXP_CALL
3863 case NT_CALL:
3864 if (IS_CALL_RECURSION(NCALL(node)))
3865 env->has_recursion = 1;
3866 else
3867 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3868 break;
3869# endif
3870
3871 default:
3872 break;
3873 }
3874
3875 return r;
3876}
3877#endif
3878
3879#define IN_ALT (1<<0)
3880#define IN_NOT (1<<1)
3881#define IN_REPEAT (1<<2)
3882#define IN_VAR_REPEAT (1<<3)
3883#define IN_CALL (1<<4)
3884#define IN_RECCALL (1<<5)
3885
3886/* setup_tree does the following work.
3887 1. check empty loop. (set qn->target_empty_info)
3888 2. expand ignore-case in char class.
3889 3. set memory status bit flags. (reg->mem_stats)
3890 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3891 5. find invalid patterns in look-behind.
3892 6. expand repeated string.
3893 */
3894static int
3895setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3896{
3897 int type;
3898 int r = 0;
3899
3900restart:
3901 type = NTYPE(node);
3902 switch (type) {
3903 case NT_LIST:
3904 {
3905 Node* prev = NULL_NODE;
3906 do {
3907 r = setup_tree(NCAR(node), reg, state, env);
3908 if (IS_NOT_NULL(prev) && r == 0) {
3909 r = next_setup(prev, NCAR(node), reg);
3910 }
3911 prev = NCAR(node);
3912 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3913 }
3914 break;
3915
3916 case NT_ALT:
3917 do {
3918 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3919 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3920 break;
3921
3922 case NT_CCLASS:
3923 break;
3924
3925 case NT_STR:
3926 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3927 r = expand_case_fold_string(node, reg);
3928 }
3929 break;
3930
3931 case NT_CTYPE:
3932 case NT_CANY:
3933 break;
3934
3935#ifdef USE_SUBEXP_CALL
3936 case NT_CALL:
3937 break;
3938#endif
3939
3940 case NT_BREF:
3941 {
3942 int i;
3943 int* p;
3944 Node** nodes = SCANENV_MEM_NODES(env);
3945 BRefNode* br = NBREF(node);
3946 p = BACKREFS_P(br);
3947 for (i = 0; i < br->back_num; i++) {
3948 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3949 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3950 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3951#ifdef USE_BACKREF_WITH_LEVEL
3952 if (IS_BACKREF_NEST_LEVEL(br)) {
3953 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3954 }
3955#endif
3956 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3957 }
3958 }
3959 break;
3960
3961 case NT_QTFR:
3962 {
3963 OnigDistance d;
3964 QtfrNode* qn = NQTFR(node);
3965 Node* target = qn->target;
3966
3967 if ((state & IN_REPEAT) != 0) {
3968 qn->state |= NST_IN_REPEAT;
3969 }
3970
3971 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3972 r = get_min_match_length(target, &d, env);
3973 if (r) break;
3974 if (d == 0) {
3975 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3976#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3977 r = quantifiers_memory_node_info(target);
3978 if (r < 0) break;
3979 if (r > 0) {
3980 qn->target_empty_info = r;
3981 }
3982#endif
3983#if 0
3984 r = get_max_match_length(target, &d, env);
3985 if (r == 0 && d == 0) {
3986 /* ()* ==> ()?, ()+ ==> () */
3987 qn->upper = 1;
3988 if (qn->lower > 1) qn->lower = 1;
3989 if (NTYPE(target) == NT_STR) {
3990 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3991 }
3992 }
3993#endif
3994 }
3995 }
3996
3997 state |= IN_REPEAT;
3998 if (qn->lower != qn->upper)
3999 state |= IN_VAR_REPEAT;
4000 r = setup_tree(target, reg, state, env);
4001 if (r) break;
4002
4003 /* expand string */
4004#define EXPAND_STRING_MAX_LENGTH 100
4005 if (NTYPE(target) == NT_STR) {
4006 if (qn->lower > 1) {
4007 int i, n = qn->lower;
4008 OnigDistance len = NSTRING_LEN(target);
4009 StrNode* sn = NSTR(target);
4010 Node* np;
4011
4012 np = onig_node_new_str(sn->s, sn->end);
4013 if (IS_NULL(np)) return ONIGERR_MEMORY;
4014 NSTR(np)->flag = sn->flag;
4015
4016 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
4017 r = onig_node_str_cat(np, sn->s, sn->end);
4018 if (r) {
4019 onig_node_free(np);
4020 return r;
4021 }
4022 }
4023 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4024 Node *np1, *np2;
4025
4026 qn->lower -= i;
4027 if (! IS_REPEAT_INFINITE(qn->upper))
4028 qn->upper -= i;
4029
4030 np1 = onig_node_new_list(np, NULL);
4031 if (IS_NULL(np1)) {
4032 onig_node_free(np);
4033 return ONIGERR_MEMORY;
4034 }
4035 swap_node(np1, node);
4036 np2 = onig_node_list_add(node, np1);
4037 if (IS_NULL(np2)) {
4038 onig_node_free(np1);
4039 return ONIGERR_MEMORY;
4040 }
4041 }
4042 else {
4043 swap_node(np, node);
4044 onig_node_free(np);
4045 }
4046 break; /* break case NT_QTFR: */
4047 }
4048 }
4049
4050#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4051 if (qn->greedy && (qn->target_empty_info != 0)) {
4052 if (NTYPE(target) == NT_QTFR) {
4053 QtfrNode* tqn = NQTFR(target);
4054 if (IS_NOT_NULL(tqn->head_exact)) {
4055 qn->head_exact = tqn->head_exact;
4056 tqn->head_exact = NULL;
4057 }
4058 }
4059 else {
4060 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4061 }
4062 }
4063#endif
4064 }
4065 break;
4066
4067 case NT_ENCLOSE:
4068 {
4069 EncloseNode* en = NENCLOSE(node);
4070
4071 switch (en->type) {
4072 case ENCLOSE_OPTION:
4073 {
4074 OnigOptionType options = reg->options;
4075 reg->options = NENCLOSE(node)->option;
4076 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4077 reg->options = options;
4078 }
4079 break;
4080
4081 case ENCLOSE_MEMORY:
4082 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
4083 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4084 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4085 }
4086 if (IS_ENCLOSE_CALLED(en))
4087 state |= IN_CALL;
4088 if (IS_ENCLOSE_RECURSION(en))
4089 state |= IN_RECCALL;
4090 else if ((state & IN_RECCALL) != 0)
4091 SET_CALL_RECURSION(node);
4092 r = setup_tree(en->target, reg, state, env);
4093 break;
4094
4095 case ENCLOSE_STOP_BACKTRACK:
4096 {
4097 Node* target = en->target;
4098 r = setup_tree(target, reg, state, env);
4099 if (NTYPE(target) == NT_QTFR) {
4100 QtfrNode* tqn = NQTFR(target);
4101 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4102 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4103 int qtype = NTYPE(tqn->target);
4104 if (IS_NODE_TYPE_SIMPLE(qtype))
4105 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4106 }
4107 }
4108 }
4109 break;
4110
4111 case ENCLOSE_CONDITION:
4112#ifdef USE_NAMED_GROUP
4113 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4114 env->num_named > 0 &&
4115 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4116 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4117 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4118 }
4119#endif
4120 if (NENCLOSE(node)->regnum > env->num_mem)
4121 return ONIGERR_INVALID_BACKREF;
4122 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4123 break;
4124
4125 case ENCLOSE_ABSENT:
4126 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4127 break;
4128 }
4129 }
4130 break;
4131
4132 case NT_ANCHOR:
4133 {
4134 AnchorNode* an = NANCHOR(node);
4135
4136 switch (an->type) {
4137 case ANCHOR_PREC_READ:
4138 r = setup_tree(an->target, reg, state, env);
4139 break;
4140 case ANCHOR_PREC_READ_NOT:
4141 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4142 break;
4143
4144/* allowed node types in look-behind */
4145#define ALLOWED_TYPE_IN_LB \
4146 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4147 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4148
4149#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4150#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4151
4152#define ALLOWED_ANCHOR_IN_LB \
4153( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4154 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4155 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4156 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4157#define ALLOWED_ANCHOR_IN_LB_NOT \
4158( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4159 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4160 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4161 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4162
4163 case ANCHOR_LOOK_BEHIND:
4164 {
4165 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4166 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4167 if (r < 0) return r;
4168 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4169 if (NTYPE(node) != NT_ANCHOR) goto restart;
4170 r = setup_tree(an->target, reg, state, env);
4171 if (r != 0) return r;
4172 r = setup_look_behind(node, reg, env);
4173 }
4174 break;
4175
4176 case ANCHOR_LOOK_BEHIND_NOT:
4177 {
4178 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4179 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4180 if (r < 0) return r;
4181 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4182 if (NTYPE(node) != NT_ANCHOR) goto restart;
4183 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4184 if (r != 0) return r;
4185 r = setup_look_behind(node, reg, env);
4186 }
4187 break;
4188 }
4189 }
4190 break;
4191
4192 default:
4193 break;
4194 }
4195
4196 return r;
4197}
4198
4199#ifndef USE_SUNDAY_QUICK_SEARCH
4200/* set skip map for Boyer-Moore search */
4201static int
4202set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4203 UChar skip[], int** int_skip, int ignore_case)
4204{
4205 OnigDistance i, len;
4206 int clen, flen, n, j, k;
4207 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4208 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4209 OnigEncoding enc = reg->enc;
4210
4211 len = end - s;
4212 if (len < ONIG_CHAR_TABLE_SIZE) {
4213 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4214
4215 n = 0;
4216 for (i = 0; i < len - 1; i += clen) {
4217 p = s + i;
4218 if (ignore_case)
4219 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4220 p, end, items);
4221 clen = enclen(enc, p, end);
4222 if (p + clen > end)
4223 clen = (int )(end - p);
4224
4225 for (j = 0; j < n; j++) {
4226 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4227 return 1; /* different length isn't supported. */
4228 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4229 if (flen != clen)
4230 return 1; /* different length isn't supported. */
4231 }
4232 for (j = 0; j < clen; j++) {
4233 skip[s[i + j]] = (UChar )(len - 1 - i - j);
4234 for (k = 0; k < n; k++) {
4235 skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4236 }
4237 }
4238 }
4239 }
4240 else {
4241# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4242 /* This should not happen. */
4243 return ONIGERR_TYPE_BUG;
4244# else
4245 if (IS_NULL(*int_skip)) {
4246 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4247 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4248 }
4249 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4250
4251 n = 0;
4252 for (i = 0; i < len - 1; i += clen) {
4253 p = s + i;
4254 if (ignore_case)
4255 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4256 p, end, items);
4257 clen = enclen(enc, p, end);
4258 if (p + clen > end)
4259 clen = (int )(end - p);
4260
4261 for (j = 0; j < n; j++) {
4262 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4263 return 1; /* different length isn't supported. */
4264 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4265 if (flen != clen)
4266 return 1; /* different length isn't supported. */
4267 }
4268 for (j = 0; j < clen; j++) {
4269 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4270 for (k = 0; k < n; k++) {
4271 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4272 }
4273 }
4274 }
4275# endif
4276 }
4277 return 0;
4278}
4279
4280#else /* USE_SUNDAY_QUICK_SEARCH */
4281
4282/* set skip map for Sunday's quick search */
4283static int
4284set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4285 UChar skip[], int** int_skip, int ignore_case)
4286{
4287 OnigDistance i, len;
4288 int clen, flen, n, j, k;
4289 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4290 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4291 OnigEncoding enc = reg->enc;
4292
4293 len = end - s;
4294 if (len < ONIG_CHAR_TABLE_SIZE) {
4295 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4296
4297 n = 0;
4298 for (i = 0; i < len; i += clen) {
4299 p = s + i;
4300 if (ignore_case)
4301 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4302 p, end, items);
4303 clen = enclen(enc, p, end);
4304 if (p + clen > end)
4305 clen = (int )(end - p);
4306
4307 for (j = 0; j < n; j++) {
4308 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4309 return 1; /* different length isn't supported. */
4310 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4311 if (flen != clen)
4312 return 1; /* different length isn't supported. */
4313 }
4314 for (j = 0; j < clen; j++) {
4315 skip[s[i + j]] = (UChar )(len - i - j);
4316 for (k = 0; k < n; k++) {
4317 skip[buf[k][j]] = (UChar )(len - i - j);
4318 }
4319 }
4320 }
4321 }
4322 else {
4323# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4324 /* This should not happen. */
4325 return ONIGERR_TYPE_BUG;
4326# else
4327 if (IS_NULL(*int_skip)) {
4328 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4329 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4330 }
4331 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4332
4333 n = 0;
4334 for (i = 0; i < len; i += clen) {
4335 p = s + i;
4336 if (ignore_case)
4337 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4338 p, end, items);
4339 clen = enclen(enc, p, end);
4340 if (p + clen > end)
4341 clen = (int )(end - p);
4342
4343 for (j = 0; j < n; j++) {
4344 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4345 return 1; /* different length isn't supported. */
4346 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4347 if (flen != clen)
4348 return 1; /* different length isn't supported. */
4349 }
4350 for (j = 0; j < clen; j++) {
4351 (*int_skip)[s[i + j]] = (int )(len - i - j);
4352 for (k = 0; k < n; k++) {
4353 (*int_skip)[buf[k][j]] = (int )(len - i - j);
4354 }
4355 }
4356 }
4357# endif
4358 }
4359 return 0;
4360}
4361#endif /* USE_SUNDAY_QUICK_SEARCH */
4362
4363typedef struct {
4364 OnigDistance min; /* min byte length */
4365 OnigDistance max; /* max byte length */
4366} MinMaxLen;
4367
4368typedef struct {
4369 MinMaxLen mmd;
4370 OnigEncoding enc;
4371 OnigOptionType options;
4372 OnigCaseFoldType case_fold_flag;
4373 ScanEnv* scan_env;
4374} OptEnv;
4375
4376typedef struct {
4377 int left_anchor;
4378 int right_anchor;
4379} OptAncInfo;
4380
4381typedef struct {
4382 MinMaxLen mmd; /* info position */
4383 OptAncInfo anc;
4384
4385 int reach_end;
4386 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4387 int len;
4388 UChar s[OPT_EXACT_MAXLEN];
4389} OptExactInfo;
4390
4391typedef struct {
4392 MinMaxLen mmd; /* info position */
4393 OptAncInfo anc;
4394
4395 int value; /* weighted value */
4396 UChar map[ONIG_CHAR_TABLE_SIZE];
4397} OptMapInfo;
4398
4399typedef struct {
4400 MinMaxLen len;
4401
4402 OptAncInfo anc;
4403 OptExactInfo exb; /* boundary */
4404 OptExactInfo exm; /* middle */
4405 OptExactInfo expr; /* prec read (?=...) */
4406
4407 OptMapInfo map; /* boundary */
4408} NodeOptInfo;
4409
4410
4411static int
4412map_position_value(OnigEncoding enc, int i)
4413{
4414 static const short int ByteValTable[] = {
4415 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4416 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4417 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4418 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4419 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4420 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4421 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4422 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4423 };
4424
4425 if (i < numberof(ByteValTable)) {
4426 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4427 return 20;
4428 else
4429 return (int )ByteValTable[i];
4430 }
4431 else
4432 return 4; /* Take it easy. */
4433}
4434
4435static int
4436distance_value(MinMaxLen* mm)
4437{
4438 /* 1000 / (min-max-dist + 1) */
4439 static const short int dist_vals[] = {
4440 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4441 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4442 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4443 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4444 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4445 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4446 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4447 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4448 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4449 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4450 };
4451
4452 OnigDistance d;
4453
4454 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4455
4456 d = mm->max - mm->min;
4457 if (d < numberof(dist_vals))
4458 /* return dist_vals[d] * 16 / (mm->min + 12); */
4459 return (int )dist_vals[d];
4460 else
4461 return 1;
4462}
4463
4464static int
4465comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4466{
4467 if (v2 <= 0) return -1;
4468 if (v1 <= 0) return 1;
4469
4470 v1 *= distance_value(d1);
4471 v2 *= distance_value(d2);
4472
4473 if (v2 > v1) return 1;
4474 if (v2 < v1) return -1;
4475
4476 if (d2->min < d1->min) return 1;
4477 if (d2->min > d1->min) return -1;
4478 return 0;
4479}
4480
4481static int
4482is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4483{
4484 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4485}
4486
4487
4488static void
4489set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4490{
4491 mml->min = min;
4492 mml->max = max;
4493}
4494
4495static void
4496clear_mml(MinMaxLen* mml)
4497{
4498 mml->min = mml->max = 0;
4499}
4500
4501static void
4502copy_mml(MinMaxLen* to, MinMaxLen* from)
4503{
4504 to->min = from->min;
4505 to->max = from->max;
4506}
4507
4508static void
4509add_mml(MinMaxLen* to, MinMaxLen* from)
4510{
4511 to->min = distance_add(to->min, from->min);
4512 to->max = distance_add(to->max, from->max);
4513}
4514
4515#if 0
4516static void
4517add_len_mml(MinMaxLen* to, OnigDistance len)
4518{
4519 to->min = distance_add(to->min, len);
4520 to->max = distance_add(to->max, len);
4521}
4522#endif
4523
4524static void
4525alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4526{
4527 if (to->min > from->min) to->min = from->min;
4528 if (to->max < from->max) to->max = from->max;
4529}
4530
4531static void
4532copy_opt_env(OptEnv* to, OptEnv* from)
4533{
4534 *to = *from;
4535}
4536
4537static void
4538clear_opt_anc_info(OptAncInfo* anc)
4539{
4540 anc->left_anchor = 0;
4541 anc->right_anchor = 0;
4542}
4543
4544static void
4545copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4546{
4547 *to = *from;
4548}
4549
4550static void
4551concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4552 OnigDistance left_len, OnigDistance right_len)
4553{
4554 clear_opt_anc_info(to);
4555
4556 to->left_anchor = left->left_anchor;
4557 if (left_len == 0) {
4558 to->left_anchor |= right->left_anchor;
4559 }
4560
4561 to->right_anchor = right->right_anchor;
4562 if (right_len == 0) {
4563 to->right_anchor |= left->right_anchor;
4564 }
4565 else {
4566 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4567 }
4568}
4569
4570static int
4571is_left_anchor(int anc)
4572{
4573 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4574 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4575 anc == ANCHOR_PREC_READ_NOT)
4576 return 0;
4577
4578 return 1;
4579}
4580
4581static int
4582is_set_opt_anc_info(OptAncInfo* to, int anc)
4583{
4584 if ((to->left_anchor & anc) != 0) return 1;
4585
4586 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4587}
4588
4589static void
4590add_opt_anc_info(OptAncInfo* to, int anc)
4591{
4592 if (is_left_anchor(anc))
4593 to->left_anchor |= anc;
4594 else
4595 to->right_anchor |= anc;
4596}
4597
4598static void
4599remove_opt_anc_info(OptAncInfo* to, int anc)
4600{
4601 if (is_left_anchor(anc))
4602 to->left_anchor &= ~anc;
4603 else
4604 to->right_anchor &= ~anc;
4605}
4606
4607static void
4608alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4609{
4610 to->left_anchor &= add->left_anchor;
4611 to->right_anchor &= add->right_anchor;
4612}
4613
4614static int
4615is_full_opt_exact_info(OptExactInfo* ex)
4616{
4617 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4618}
4619
4620static void
4621clear_opt_exact_info(OptExactInfo* ex)
4622{
4623 clear_mml(&ex->mmd);
4624 clear_opt_anc_info(&ex->anc);
4625 ex->reach_end = 0;
4626 ex->ignore_case = -1; /* unset */
4627 ex->len = 0;
4628 ex->s[0] = '\0';
4629}
4630
4631static void
4632copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4633{
4634 *to = *from;
4635}
4636
4637static void
4638concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4639{
4640 int i, j, len;
4641 UChar *p, *end;
4642 OptAncInfo tanc;
4643
4644 if (to->ignore_case < 0)
4645 to->ignore_case = add->ignore_case;
4646 else if (to->ignore_case != add->ignore_case)
4647 return ; /* avoid */
4648
4649 p = add->s;
4650 end = p + add->len;
4651 for (i = to->len; p < end; ) {
4652 len = enclen(enc, p, end);
4653 if (i + len > OPT_EXACT_MAXLEN) break;
4654 for (j = 0; j < len && p < end; j++)
4655 to->s[i++] = *p++;
4656 }
4657
4658 to->len = i;
4659 to->reach_end = (p == end ? add->reach_end : 0);
4660
4661 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4662 if (! to->reach_end) tanc.right_anchor = 0;
4663 copy_opt_anc_info(&to->anc, &tanc);
4664}
4665
4666static void
4667concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4668 int raw ARG_UNUSED, OnigEncoding enc)
4669{
4670 int i, j, len;
4671 UChar *p;
4672
4673 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4674 len = enclen(enc, p, end);
4675 if (i + len > OPT_EXACT_MAXLEN) break;
4676 for (j = 0; j < len && p < end; j++)
4677 to->s[i++] = *p++;
4678 }
4679
4680 to->len = i;
4681}
4682
4683static void
4684alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4685{
4686 int i, j, len;
4687
4688 if (add->len == 0 || to->len == 0) {
4689 clear_opt_exact_info(to);
4690 return ;
4691 }
4692
4693 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4694 clear_opt_exact_info(to);
4695 return ;
4696 }
4697
4698 for (i = 0; i < to->len && i < add->len; ) {
4699 if (to->s[i] != add->s[i]) break;
4700 len = enclen(env->enc, to->s + i, to->s + to->len);
4701
4702 for (j = 1; j < len; j++) {
4703 if (to->s[i+j] != add->s[i+j]) break;
4704 }
4705 if (j < len) break;
4706 i += len;
4707 }
4708
4709 if (! add->reach_end || i < add->len || i < to->len) {
4710 to->reach_end = 0;
4711 }
4712 to->len = i;
4713 if (to->ignore_case < 0)
4714 to->ignore_case = add->ignore_case;
4715 else if (add->ignore_case >= 0)
4716 to->ignore_case |= add->ignore_case;
4717
4718 alt_merge_opt_anc_info(&to->anc, &add->anc);
4719 if (! to->reach_end) to->anc.right_anchor = 0;
4720}
4721
4722static void
4723select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4724{
4725 int v1, v2;
4726
4727 v1 = now->len;
4728 v2 = alt->len;
4729
4730 if (v2 == 0) {
4731 return ;
4732 }
4733 else if (v1 == 0) {
4734 copy_opt_exact_info(now, alt);
4735 return ;
4736 }
4737 else if (v1 <= 2 && v2 <= 2) {
4738 /* ByteValTable[x] is big value --> low price */
4739 v2 = map_position_value(enc, now->s[0]);
4740 v1 = map_position_value(enc, alt->s[0]);
4741
4742 if (now->len > 1) v1 += 5;
4743 if (alt->len > 1) v2 += 5;
4744 }
4745
4746 if (now->ignore_case <= 0) v1 *= 2;
4747 if (alt->ignore_case <= 0) v2 *= 2;
4748
4749 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4750 copy_opt_exact_info(now, alt);
4751}
4752
4753static void
4754clear_opt_map_info(OptMapInfo* map)
4755{
4756 static const OptMapInfo clean_info = {
4757 {0, 0}, {0, 0}, 0,
4758 {
4759 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4760 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4761 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4762 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4763 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4764 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4765 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4766 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4767 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4768 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4769 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4770 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4771 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4772 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4773 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4774 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4775 }
4776 };
4777
4778 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4779}
4780
4781static void
4782copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4783{
4784 *to = *from;
4785}
4786
4787static void
4788add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4789{
4790 if (map->map[c] == 0) {
4791 map->map[c] = 1;
4792 map->value += map_position_value(enc, c);
4793 }
4794}
4795
4796static int
4797add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4798 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4799{
4800 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4801 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4802 int i, n;
4803
4804 add_char_opt_map_info(map, p[0], enc);
4805
4806 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4807 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4808 if (n < 0) return n;
4809
4810 for (i = 0; i < n; i++) {
4811 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4812 add_char_opt_map_info(map, buf[0], enc);
4813 }
4814
4815 return 0;
4816}
4817
4818static void
4819select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4820{
4821 const int z = 1<<15; /* 32768: something big value */
4822
4823 int v1, v2;
4824
4825 if (alt->value == 0) return ;
4826 if (now->value == 0) {
4827 copy_opt_map_info(now, alt);
4828 return ;
4829 }
4830
4831 v1 = z / now->value;
4832 v2 = z / alt->value;
4833 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4834 copy_opt_map_info(now, alt);
4835}
4836
4837static int
4838comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4839{
4840#define COMP_EM_BASE 20
4841 int ve, vm;
4842
4843 if (m->value <= 0) return -1;
4844
4845 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4846 vm = COMP_EM_BASE * 5 * 2 / m->value;
4847 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4848}
4849
4850static void
4851alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4852{
4853 int i, val;
4854
4855 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4856 if (to->value == 0) return ;
4857 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4858 clear_opt_map_info(to);
4859 return ;
4860 }
4861
4862 alt_merge_mml(&to->mmd, &add->mmd);
4863
4864 val = 0;
4865 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4866 if (add->map[i])
4867 to->map[i] = 1;
4868
4869 if (to->map[i])
4870 val += map_position_value(enc, i);
4871 }
4872 to->value = val;
4873
4874 alt_merge_opt_anc_info(&to->anc, &add->anc);
4875}
4876
4877static void
4878set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4879{
4880 copy_mml(&(opt->exb.mmd), mmd);
4881 copy_mml(&(opt->expr.mmd), mmd);
4882 copy_mml(&(opt->map.mmd), mmd);
4883}
4884
4885static void
4886clear_node_opt_info(NodeOptInfo* opt)
4887{
4888 clear_mml(&opt->len);
4889 clear_opt_anc_info(&opt->anc);
4890 clear_opt_exact_info(&opt->exb);
4891 clear_opt_exact_info(&opt->exm);
4892 clear_opt_exact_info(&opt->expr);
4893 clear_opt_map_info(&opt->map);
4894}
4895
4896static void
4897copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4898{
4899 *to = *from;
4900}
4901
4902static void
4903concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4904{
4905 int exb_reach, exm_reach;
4906 OptAncInfo tanc;
4907
4908 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4909 copy_opt_anc_info(&to->anc, &tanc);
4910
4911 if (add->exb.len > 0 && to->len.max == 0) {
4912 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4913 to->len.max, add->len.max);
4914 copy_opt_anc_info(&add->exb.anc, &tanc);
4915 }
4916
4917 if (add->map.value > 0 && to->len.max == 0) {
4918 if (add->map.mmd.max == 0)
4919 add->map.anc.left_anchor |= to->anc.left_anchor;
4920 }
4921
4922 exb_reach = to->exb.reach_end;
4923 exm_reach = to->exm.reach_end;
4924
4925 if (add->len.max != 0)
4926 to->exb.reach_end = to->exm.reach_end = 0;
4927
4928 if (add->exb.len > 0) {
4929 if (exb_reach) {
4930 concat_opt_exact_info(&to->exb, &add->exb, enc);
4931 clear_opt_exact_info(&add->exb);
4932 }
4933 else if (exm_reach) {
4934 concat_opt_exact_info(&to->exm, &add->exb, enc);
4935 clear_opt_exact_info(&add->exb);
4936 }
4937 }
4938 select_opt_exact_info(enc, &to->exm, &add->exb);
4939 select_opt_exact_info(enc, &to->exm, &add->exm);
4940
4941 if (to->expr.len > 0) {
4942 if (add->len.max > 0) {
4943 if (to->expr.len > (int )add->len.max)
4944 to->expr.len = (int )add->len.max;
4945
4946 if (to->expr.mmd.max == 0)
4947 select_opt_exact_info(enc, &to->exb, &to->expr);
4948 else
4949 select_opt_exact_info(enc, &to->exm, &to->expr);
4950 }
4951 }
4952 else if (add->expr.len > 0) {
4953 copy_opt_exact_info(&to->expr, &add->expr);
4954 }
4955
4956 select_opt_map_info(&to->map, &add->map);
4957
4958 add_mml(&to->len, &add->len);
4959}
4960
4961static void
4962alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4963{
4964 alt_merge_opt_anc_info (&to->anc, &add->anc);
4965 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4966 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4967 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4968 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4969
4970 alt_merge_mml(&to->len, &add->len);
4971}
4972
4973
4974#define MAX_NODE_OPT_INFO_REF_COUNT 5
4975
4976static int
4977optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4978{
4979 int type;
4980 int r = 0;
4981
4982 clear_node_opt_info(opt);
4983 set_bound_node_opt_info(opt, &env->mmd);
4984
4985 type = NTYPE(node);
4986 switch (type) {
4987 case NT_LIST:
4988 {
4989 OptEnv nenv;
4990 NodeOptInfo nopt;
4991 Node* nd = node;
4992
4993 copy_opt_env(&nenv, env);
4994 do {
4995 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4996 if (r == 0) {
4997 add_mml(&nenv.mmd, &nopt.len);
4998 concat_left_node_opt_info(env->enc, opt, &nopt);
4999 }
5000 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
5001 }
5002 break;
5003
5004 case NT_ALT:
5005 {
5006 NodeOptInfo nopt;
5007 Node* nd = node;
5008
5009 do {
5010 r = optimize_node_left(NCAR(nd), &nopt, env);
5011 if (r == 0) {
5012 if (nd == node) copy_node_opt_info(opt, &nopt);
5013 else alt_merge_node_opt_info(opt, &nopt, env);
5014 }
5015 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
5016 }
5017 break;
5018
5019 case NT_STR:
5020 {
5021 StrNode* sn = NSTR(node);
5022 OnigDistance slen = sn->end - sn->s;
5023 int is_raw = NSTRING_IS_RAW(node);
5024
5025 if (! NSTRING_IS_AMBIG(node)) {
5026 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5027 is_raw, env->enc);
5028 opt->exb.ignore_case = 0;
5029 if (slen > 0) {
5030 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5031 }
5032 set_mml(&opt->len, slen, slen);
5033 }
5034 else {
5035 OnigDistance max;
5036
5037 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
5038 int n = onigenc_strlen(env->enc, sn->s, sn->end);
5039 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * (OnigDistance)n;
5040 }
5041 else {
5042 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5043 is_raw, env->enc);
5044 opt->exb.ignore_case = 1;
5045
5046 if (slen > 0) {
5047 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5048 env->enc, env->case_fold_flag);
5049 if (r != 0) break;
5050 }
5051
5052 max = slen;
5053 }
5054
5055 set_mml(&opt->len, slen, max);
5056 }
5057
5058 if ((OnigDistance )opt->exb.len == slen)
5059 opt->exb.reach_end = 1;
5060 }
5061 break;
5062
5063 case NT_CCLASS:
5064 {
5065 int i, z;
5066 CClassNode* cc = NCCLASS(node);
5067
5068 /* no need to check ignore case. (set in setup_tree()) */
5069
5070 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5071 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5072 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5073
5074 set_mml(&opt->len, min, max);
5075 }
5076 else {
5077 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5078 z = BITSET_AT(cc->bs, i);
5079 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5080 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5081 }
5082 }
5083 set_mml(&opt->len, 1, 1);
5084 }
5085 }
5086 break;
5087
5088 case NT_CTYPE:
5089 {
5090 int i, min, max;
5091 int maxcode;
5092
5093 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5094
5095 if (max == 1) {
5096 min = 1;
5097
5098 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5099 switch (NCTYPE(node)->ctype) {
5100 case ONIGENC_CTYPE_WORD:
5101 if (NCTYPE(node)->not != 0) {
5102 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5103 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5104 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5105 }
5106 }
5107 }
5108 else {
5109 for (i = 0; i < maxcode; i++) {
5110 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5111 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5112 }
5113 }
5114 }
5115 break;
5116 }
5117 }
5118 else {
5119 min = ONIGENC_MBC_MINLEN(env->enc);
5120 }
5121 set_mml(&opt->len, min, max);
5122 }
5123 break;
5124
5125 case NT_CANY:
5126 {
5127 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5128 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5129 set_mml(&opt->len, min, max);
5130 }
5131 break;
5132
5133 case NT_ANCHOR:
5134 switch (NANCHOR(node)->type) {
5135 case ANCHOR_BEGIN_BUF:
5136 case ANCHOR_BEGIN_POSITION:
5137 case ANCHOR_BEGIN_LINE:
5138 case ANCHOR_END_BUF:
5139 case ANCHOR_SEMI_END_BUF:
5140 case ANCHOR_END_LINE:
5141 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5142 case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5143 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5144 break;
5145
5146 case ANCHOR_PREC_READ:
5147 {
5148 NodeOptInfo nopt;
5149
5150 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5151 if (r == 0) {
5152 if (nopt.exb.len > 0)
5153 copy_opt_exact_info(&opt->expr, &nopt.exb);
5154 else if (nopt.exm.len > 0)
5155 copy_opt_exact_info(&opt->expr, &nopt.exm);
5156
5157 opt->expr.reach_end = 0;
5158
5159 if (nopt.map.value > 0)
5160 copy_opt_map_info(&opt->map, &nopt.map);
5161 }
5162 }
5163 break;
5164
5165 case ANCHOR_LOOK_BEHIND_NOT:
5166 break;
5167 }
5168 break;
5169
5170 case NT_BREF:
5171 {
5172 int i;
5173 int* backs;
5174 OnigDistance min, max, tmin, tmax;
5175 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5176 BRefNode* br = NBREF(node);
5177
5178 if (br->state & NST_RECURSION) {
5179 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5180 break;
5181 }
5182 backs = BACKREFS_P(br);
5183 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5184 if (r != 0) break;
5185 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5186 if (r != 0) break;
5187 for (i = 1; i < br->back_num; i++) {
5188 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5189 if (r != 0) break;
5190 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5191 if (r != 0) break;
5192 if (min > tmin) min = tmin;
5193 if (max < tmax) max = tmax;
5194 }
5195 if (r == 0) set_mml(&opt->len, min, max);
5196 }
5197 break;
5198
5199#ifdef USE_SUBEXP_CALL
5200 case NT_CALL:
5201 if (IS_CALL_RECURSION(NCALL(node)))
5202 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5203 else {
5204 OnigOptionType save = env->options;
5205 env->options = NENCLOSE(NCALL(node)->target)->option;
5206 r = optimize_node_left(NCALL(node)->target, opt, env);
5207 env->options = save;
5208 }
5209 break;
5210#endif
5211
5212 case NT_QTFR:
5213 {
5214 int i;
5215 OnigDistance min, max;
5216 NodeOptInfo nopt;
5217 QtfrNode* qn = NQTFR(node);
5218
5219 r = optimize_node_left(qn->target, &nopt, env);
5220 if (r) break;
5221
5222 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5223 if (env->mmd.max == 0 &&
5224 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5225 if (IS_MULTILINE(env->options))
5226 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5227 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5228 else
5229 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5230 }
5231 }
5232 else {
5233 if (qn->lower > 0) {
5234 copy_node_opt_info(opt, &nopt);
5235 if (nopt.exb.len > 0) {
5236 if (nopt.exb.reach_end) {
5237 for (i = 2; i <= qn->lower &&
5238 ! is_full_opt_exact_info(&opt->exb); i++) {
5239 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5240 }
5241 if (i < qn->lower) {
5242 opt->exb.reach_end = 0;
5243 }
5244 }
5245 }
5246
5247 if (qn->lower != qn->upper) {
5248 opt->exb.reach_end = 0;
5249 opt->exm.reach_end = 0;
5250 }
5251 if (qn->lower > 1)
5252 opt->exm.reach_end = 0;
5253 }
5254 }
5255
5256 min = distance_multiply(nopt.len.min, qn->lower);
5257 if (IS_REPEAT_INFINITE(qn->upper))
5258 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5259 else
5260 max = distance_multiply(nopt.len.max, qn->upper);
5261
5262 set_mml(&opt->len, min, max);
5263 }
5264 break;
5265
5266 case NT_ENCLOSE:
5267 {
5268 EncloseNode* en = NENCLOSE(node);
5269
5270 switch (en->type) {
5271 case ENCLOSE_OPTION:
5272 {
5273 OnigOptionType save = env->options;
5274
5275 env->options = en->option;
5276 r = optimize_node_left(en->target, opt, env);
5277 env->options = save;
5278 }
5279 break;
5280
5281 case ENCLOSE_MEMORY:
5282#ifdef USE_SUBEXP_CALL
5283 en->opt_count++;
5284 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5285 OnigDistance min, max;
5286
5287 min = 0;
5288 max = ONIG_INFINITE_DISTANCE;
5289 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5290 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5291 set_mml(&opt->len, min, max);
5292 }
5293 else
5294#endif
5295 {
5296 r = optimize_node_left(en->target, opt, env);
5297
5298 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5299 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5300 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5301 }
5302 }
5303 break;
5304
5305 case ENCLOSE_STOP_BACKTRACK:
5306 case ENCLOSE_CONDITION:
5307 r = optimize_node_left(en->target, opt, env);
5308 break;
5309
5310 case ENCLOSE_ABSENT:
5311 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5312 break;
5313 }
5314 }
5315 break;
5316
5317 default:
5318#ifdef ONIG_DEBUG
5319 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5320 NTYPE(node));
5321#endif
5322 r = ONIGERR_TYPE_BUG;
5323 break;
5324 }
5325
5326 return r;
5327}
5328
5329static int
5330set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5331{
5332 int r;
5333 int allow_reverse;
5334
5335 if (e->len == 0) return 0;
5336
5337 reg->exact = (UChar* )xmalloc(e->len);
5338 CHECK_NULL_RETURN_MEMERR(reg->exact);
5339 xmemcpy(reg->exact, e->s, e->len);
5340 reg->exact_end = reg->exact + e->len;
5341
5342 allow_reverse =
5343 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5344
5345 if (e->ignore_case > 0) {
5346 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5347 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5348 reg->map, &(reg->int_map), 1);
5349 if (r == 0) {
5350 reg->optimize = (allow_reverse != 0
5351 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5352 }
5353 else {
5354 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5355 }
5356 }
5357 else {
5358 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5359 }
5360 }
5361 else {
5362 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5363 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5364 reg->map, &(reg->int_map), 0);
5365 if (r == 0) {
5366 reg->optimize = (allow_reverse != 0
5367 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5368 }
5369 else {
5370 reg->optimize = ONIG_OPTIMIZE_EXACT;
5371 }
5372 }
5373 else {
5374 reg->optimize = ONIG_OPTIMIZE_EXACT;
5375 }
5376 }
5377
5378 reg->dmin = e->mmd.min;
5379 reg->dmax = e->mmd.max;
5380
5381 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5382 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5383 }
5384
5385 return 0;
5386}
5387
5388static void
5389set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5390{
5391 int i;
5392
5393 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5394 reg->map[i] = m->map[i];
5395
5396 reg->optimize = ONIG_OPTIMIZE_MAP;
5397 reg->dmin = m->mmd.min;
5398 reg->dmax = m->mmd.max;
5399
5400 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5401 reg->threshold_len = (int )(reg->dmin + 1);
5402 }
5403}
5404
5405static void
5406set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5407{
5408 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5409 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5410}
5411
5412#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5413static void print_optimize_info(FILE* f, regex_t* reg);
5414#endif
5415
5416static int
5417set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5418{
5419
5420 int r;
5421 NodeOptInfo opt;
5422 OptEnv env;
5423
5424 env.enc = reg->enc;
5425 env.options = reg->options;
5426 env.case_fold_flag = reg->case_fold_flag;
5427 env.scan_env = scan_env;
5428 clear_mml(&env.mmd);
5429
5430 r = optimize_node_left(node, &opt, &env);
5431 if (r) return r;
5432
5433 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5434 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5435 ANCHOR_LOOK_BEHIND);
5436
5437 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5438 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5439
5440 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5441 ANCHOR_PREC_READ_NOT);
5442
5443 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5444 reg->anchor_dmin = opt.len.min;
5445 reg->anchor_dmax = opt.len.max;
5446 }
5447
5448 if (opt.exb.len > 0 || opt.exm.len > 0) {
5449 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5450 if (opt.map.value > 0 &&
5451 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5452 goto set_map;
5453 }
5454 else {
5455 r = set_optimize_exact_info(reg, &opt.exb);
5456 set_sub_anchor(reg, &opt.exb.anc);
5457 }
5458 }
5459 else if (opt.map.value > 0) {
5460 set_map:
5461 set_optimize_map_info(reg, &opt.map);
5462 set_sub_anchor(reg, &opt.map.anc);
5463 }
5464 else {
5465 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5466 if (opt.len.max == 0)
5467 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5468 }
5469
5470#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5471 print_optimize_info(stderr, reg);
5472#endif
5473 return r;
5474}
5475
5476static void
5477clear_optimize_info(regex_t* reg)
5478{
5479 reg->optimize = ONIG_OPTIMIZE_NONE;
5480 reg->anchor = 0;
5481 reg->anchor_dmin = 0;
5482 reg->anchor_dmax = 0;
5483 reg->sub_anchor = 0;
5484 reg->exact_end = (UChar* )NULL;
5485 reg->threshold_len = 0;
5486 if (IS_NOT_NULL(reg->exact)) {
5487 xfree(reg->exact);
5488 reg->exact = (UChar* )NULL;
5489 }
5490}
5491
5492#ifdef ONIG_DEBUG
5493
5494static void print_enc_string(FILE* fp, OnigEncoding enc,
5495 const UChar *s, const UChar *end)
5496{
5497 fprintf(fp, "\nPATTERN: /");
5498
5499 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5500 const UChar *p;
5501 OnigCodePoint code;
5502
5503 p = s;
5504 while (p < end) {
5505 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5506 if (code >= 0x80) {
5507 fprintf(fp, " 0x%04x ", (int )code);
5508 }
5509 else {
5510 fputc((int )code, fp);
5511 }
5512
5513 p += enclen(enc, p, end);
5514 }
5515 }
5516 else {
5517 while (s < end) {
5518 fputc((int )*s, fp);
5519 s++;
5520 }
5521 }
5522
5523 fprintf(fp, "/ (%s)\n", enc->name);
5524}
5525#endif /* ONIG_DEBUG */
5526
5527#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5528static void
5529print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5530{
5531 if (a == ONIG_INFINITE_DISTANCE)
5532 fputs("inf", f);
5533 else
5534 fprintf(f, "(%"PRIuPTR")", a);
5535
5536 fputs("-", f);
5537
5538 if (b == ONIG_INFINITE_DISTANCE)
5539 fputs("inf", f);
5540 else
5541 fprintf(f, "(%"PRIuPTR")", b);
5542}
5543
5544static void
5545print_anchor(FILE* f, int anchor)
5546{
5547 int q = 0;
5548
5549 fprintf(f, "[");
5550
5551 if (anchor & ANCHOR_BEGIN_BUF) {
5552 fprintf(f, "begin-buf");
5553 q = 1;
5554 }
5555 if (anchor & ANCHOR_BEGIN_LINE) {
5556 if (q) fprintf(f, ", ");
5557 q = 1;
5558 fprintf(f, "begin-line");
5559 }
5560 if (anchor & ANCHOR_BEGIN_POSITION) {
5561 if (q) fprintf(f, ", ");
5562 q = 1;
5563 fprintf(f, "begin-pos");
5564 }
5565 if (anchor & ANCHOR_END_BUF) {
5566 if (q) fprintf(f, ", ");
5567 q = 1;
5568 fprintf(f, "end-buf");
5569 }
5570 if (anchor & ANCHOR_SEMI_END_BUF) {
5571 if (q) fprintf(f, ", ");
5572 q = 1;
5573 fprintf(f, "semi-end-buf");
5574 }
5575 if (anchor & ANCHOR_END_LINE) {
5576 if (q) fprintf(f, ", ");
5577 q = 1;
5578 fprintf(f, "end-line");
5579 }
5580 if (anchor & ANCHOR_ANYCHAR_STAR) {
5581 if (q) fprintf(f, ", ");
5582 q = 1;
5583 fprintf(f, "anychar-star");
5584 }
5585 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5586 if (q) fprintf(f, ", ");
5587 fprintf(f, "anychar-star-ml");
5588 }
5589
5590 fprintf(f, "]");
5591}
5592
5593static void
5594print_optimize_info(FILE* f, regex_t* reg)
5595{
5596 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5597 "EXACT_IC", "MAP",
5598 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5599
5600 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5601 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5602 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5603 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5604 fprintf(f, "\n");
5605
5606 if (reg->optimize) {
5607 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5608 fprintf(f, "\n");
5609 }
5610 fprintf(f, "\n");
5611
5612 if (reg->exact) {
5613 UChar *p;
5614 fprintf(f, "exact: [");
5615 for (p = reg->exact; p < reg->exact_end; p++) {
5616 fputc(*p, f);
5617 }
5618 fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5619 }
5620 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5621 int c, i, n = 0;
5622
5623 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5624 if (reg->map[i]) n++;
5625
5626 fprintf(f, "map: n=%d\n", n);
5627 if (n > 0) {
5628 c = 0;
5629 fputc('[', f);
5630 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5631 if (reg->map[i] != 0) {
5632 if (c > 0) fputs(", ", f);
5633 c++;
5634 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5635 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5636 fputc(i, f);
5637 else
5638 fprintf(f, "%d", i);
5639 }
5640 }
5641 fprintf(f, "]\n");
5642 }
5643 }
5644}
5645#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5646
5647
5648extern void
5649onig_free_body(regex_t* reg)
5650{
5651 if (IS_NOT_NULL(reg)) {
5652 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5653 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5654 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5655 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5656 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5657 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5658
5659#ifdef USE_NAMED_GROUP
5660 onig_names_free(reg);
5661#endif
5662 }
5663}
5664
5665extern void
5666onig_free(regex_t* reg)
5667{
5668 if (IS_NOT_NULL(reg)) {
5669 onig_free_body(reg);
5670 xfree(reg);
5671 }
5672}
5673
5674#ifdef RUBY
5675size_t
5676onig_memsize(const regex_t *reg)
5677{
5678 size_t size = sizeof(regex_t);
5679 if (IS_NULL(reg)) return 0;
5680 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5681 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5682 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5683 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5684 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5685 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5686
5687 return size;
5688}
5689
5690size_t
5691onig_region_memsize(const OnigRegion *regs)
5692{
5693 size_t size = sizeof(*regs);
5694 if (IS_NULL(regs)) return 0;
5695 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5696 return size;
5697}
5698#endif
5699
5700#define REGEX_TRANSFER(to,from) do {\
5701 onig_free_body(to);\
5702 xmemcpy(to, from, sizeof(regex_t));\
5703 xfree(from);\
5704} while (0)
5705
5706#if 0
5707extern void
5708onig_transfer(regex_t* to, regex_t* from)
5709{
5710 REGEX_TRANSFER(to, from);
5711}
5712#endif
5713
5714#ifdef ONIG_DEBUG_COMPILE
5715static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
5716#endif
5717#ifdef ONIG_DEBUG_PARSE_TREE
5718static void print_tree(FILE* f, Node* node);
5719#endif
5720
5721#ifdef RUBY
5722extern int
5723onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5724 OnigErrorInfo* einfo)
5725{
5726 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5727}
5728#endif
5729
5730#ifdef RUBY
5731extern int
5732onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5733 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5734#else
5735extern int
5736onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5737 OnigErrorInfo* einfo)
5738#endif
5739{
5740#define COMPILE_INIT_SIZE 20
5741
5742 int r;
5743 OnigDistance init_size;
5744 Node* root;
5745 ScanEnv scan_env = {0};
5746#ifdef USE_SUBEXP_CALL
5747 UnsetAddrList uslist;
5748#endif
5749
5750 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5751
5752#ifdef RUBY
5753 scan_env.sourcefile = sourcefile;
5754 scan_env.sourceline = sourceline;
5755#endif
5756
5757#ifdef ONIG_DEBUG
5758 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5759#endif
5760
5761 if (reg->alloc == 0) {
5762 init_size = (pattern_end - pattern) * 2;
5763 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5764 r = BBUF_INIT(reg, init_size);
5765 if (r != 0) goto end;
5766 }
5767 else
5768 reg->used = 0;
5769
5770 reg->num_mem = 0;
5771 reg->num_repeat = 0;
5772 reg->num_null_check = 0;
5773 reg->repeat_range_alloc = 0;
5774 reg->repeat_range = (OnigRepeatRange* )NULL;
5775#ifdef USE_COMBINATION_EXPLOSION_CHECK
5776 reg->num_comb_exp_check = 0;
5777#endif
5778
5779 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5780 if (r != 0) goto err;
5781
5782#ifdef ONIG_DEBUG_PARSE_TREE
5783# if 0
5784 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5785 print_tree(stderr, root);
5786# endif
5787#endif
5788
5789#ifdef USE_NAMED_GROUP
5790 /* mixed use named group and no-named group */
5791 if (scan_env.num_named > 0 &&
5792 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5793 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5794 if (scan_env.num_named != scan_env.num_mem)
5795 r = disable_noname_group_capture(&root, reg, &scan_env);
5796 else
5797 r = numbered_ref_check(root);
5798
5799 if (r != 0) goto err;
5800 }
5801#endif
5802
5803#ifdef USE_SUBEXP_CALL
5804 if (scan_env.num_call > 0) {
5805 r = unset_addr_list_init(&uslist, scan_env.num_call);
5806 if (r != 0) goto err;
5807 scan_env.unset_addr_list = &uslist;
5808 r = setup_subexp_call(root, &scan_env);
5809 if (r != 0) goto err_unset;
5810 r = subexp_recursive_check_trav(root, &scan_env);
5811 if (r < 0) goto err_unset;
5812 r = subexp_inf_recursive_check_trav(root, &scan_env);
5813 if (r != 0) goto err_unset;
5814
5815 reg->num_call = scan_env.num_call;
5816 }
5817 else
5818 reg->num_call = 0;
5819#endif
5820
5821 r = setup_tree(root, reg, 0, &scan_env);
5822 if (r != 0) goto err_unset;
5823
5824#ifdef ONIG_DEBUG_PARSE_TREE
5825 print_tree(stderr, root);
5826#endif
5827
5828 reg->capture_history = scan_env.capture_history;
5829 reg->bt_mem_start = scan_env.bt_mem_start;
5830 reg->bt_mem_start |= reg->capture_history;
5831 if (IS_FIND_CONDITION(reg->options))
5832 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5833 else {
5834 reg->bt_mem_end = scan_env.bt_mem_end;
5835 reg->bt_mem_end |= reg->capture_history;
5836 }
5837
5838#ifdef USE_COMBINATION_EXPLOSION_CHECK
5839 if (scan_env.backrefed_mem == 0
5840# ifdef USE_SUBEXP_CALL
5841 || scan_env.num_call == 0
5842# endif
5843 ) {
5844 setup_comb_exp_check(root, 0, &scan_env);
5845# ifdef USE_SUBEXP_CALL
5846 if (scan_env.has_recursion != 0) {
5847 scan_env.num_comb_exp_check = 0;
5848 }
5849 else
5850# endif
5851 if (scan_env.comb_exp_max_regnum > 0) {
5852 int i;
5853 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5854 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5855 scan_env.num_comb_exp_check = 0;
5856 break;
5857 }
5858 }
5859 }
5860 }
5861
5862 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5863#endif
5864
5865 clear_optimize_info(reg);
5866#ifndef ONIG_DONT_OPTIMIZE
5867 r = set_optimize_info_from_tree(root, reg, &scan_env);
5868 if (r != 0) goto err_unset;
5869#endif
5870
5871 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5872 xfree(scan_env.mem_nodes_dynamic);
5873 scan_env.mem_nodes_dynamic = (Node** )NULL;
5874 }
5875
5876 r = compile_tree(root, reg);
5877 if (r == 0) {
5878 r = add_opcode(reg, OP_END);
5879#ifdef USE_SUBEXP_CALL
5880 if (scan_env.num_call > 0) {
5881 r = unset_addr_list_fix(&uslist, reg);
5882 unset_addr_list_end(&uslist);
5883 if (r) goto err;
5884 }
5885#endif
5886
5887 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5888 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5889 else {
5890 if (reg->bt_mem_start != 0)
5891 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5892 else
5893 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5894 }
5895 }
5896#ifdef USE_SUBEXP_CALL
5897 else if (scan_env.num_call > 0) {
5898 unset_addr_list_end(&uslist);
5899 }
5900#endif
5901 onig_node_free(root);
5902
5903#ifdef ONIG_DEBUG_COMPILE
5904# ifdef USE_NAMED_GROUP
5905 onig_print_names(stderr, reg);
5906# endif
5907 print_compiled_byte_code_list(stderr, reg);
5908#endif
5909
5910 end:
5911 onig_reg_resize(reg);
5912 return r;
5913
5914 err_unset:
5915#ifdef USE_SUBEXP_CALL
5916 if (scan_env.num_call > 0) {
5917 unset_addr_list_end(&uslist);
5918 }
5919#endif
5920 err:
5921 if (IS_NOT_NULL(scan_env.error)) {
5922 if (IS_NOT_NULL(einfo)) {
5923 einfo->enc = scan_env.enc;
5924 einfo->par = scan_env.error;
5925 einfo->par_end = scan_env.error_end;
5926 }
5927 }
5928
5929 onig_node_free(root);
5930 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5931 xfree(scan_env.mem_nodes_dynamic);
5932 return r;
5933}
5934
5935static int onig_inited = 0;
5936
5937extern int
5938onig_reg_init(regex_t* reg, OnigOptionType option,
5939 OnigCaseFoldType case_fold_flag,
5940 OnigEncoding enc, const OnigSyntaxType* syntax)
5941{
5942 if (! onig_inited)
5943 onig_init();
5944
5945 if (IS_NULL(reg))
5946 return ONIGERR_INVALID_ARGUMENT;
5947
5948 if (ONIGENC_IS_UNDEF(enc))
5949 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5950
5951 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5952 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5953 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5954 }
5955
5956 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5957 option |= syntax->options;
5958 option &= ~ONIG_OPTION_SINGLELINE;
5959 }
5960 else
5961 option |= syntax->options;
5962
5963 (reg)->enc = enc;
5964 (reg)->options = option;
5965 (reg)->syntax = syntax;
5966 (reg)->optimize = 0;
5967 (reg)->exact = (UChar* )NULL;
5968 (reg)->int_map = (int* )NULL;
5969 (reg)->int_map_backward = (int* )NULL;
5970 (reg)->chain = (regex_t* )NULL;
5971
5972 (reg)->p = (UChar* )NULL;
5973 (reg)->alloc = 0;
5974 (reg)->used = 0;
5975 (reg)->name_table = (void* )NULL;
5976
5977 (reg)->case_fold_flag = case_fold_flag;
5978
5979 (reg)->timelimit = 0;
5980
5981 return 0;
5982}
5983
5984extern int
5985onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5986 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5987 const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5988{
5989 int r;
5990
5991 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5992 if (r) return r;
5993
5994 r = onig_compile(reg, pattern, pattern_end, einfo);
5995 return r;
5996}
5997
5998extern int
5999onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
6000 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
6001 OnigErrorInfo* einfo)
6002{
6003 int r;
6004
6005 *reg = (regex_t* )xmalloc(sizeof(regex_t));
6006 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
6007
6008 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
6009 if (r) goto err;
6010
6011 r = onig_compile(*reg, pattern, pattern_end, einfo);
6012 if (r) {
6013 err:
6014 onig_free(*reg);
6015 *reg = NULL;
6016 }
6017 return r;
6018}
6019
6020extern int
6021onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
6022{
6023 return onig_init();
6024}
6025
6026extern int
6027onig_init(void)
6028{
6029 if (onig_inited != 0)
6030 return 0;
6031
6032 onig_inited = 1;
6033
6034#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6035 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6036#endif
6037
6038 onigenc_init();
6039 /* onigenc_set_default_caseconv_table((UChar* )0); */
6040
6041#ifdef ONIG_DEBUG_STATISTICS
6042 onig_statistics_init();
6043#endif
6044
6045 return 0;
6046}
6047
6048
6049static OnigEndCallListItemType* EndCallTop;
6050
6051extern void onig_add_end_call(void (*func)(void))
6052{
6054
6055 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6056 if (item == 0) return ;
6057
6058 item->next = EndCallTop;
6059 item->func = func;
6060
6061 EndCallTop = item;
6062}
6063
6064static void
6065exec_end_call_list(void)
6066{
6068 void (*func)(void);
6069
6070 while (EndCallTop != 0) {
6071 func = EndCallTop->func;
6072 (*func)();
6073
6074 prev = EndCallTop;
6075 EndCallTop = EndCallTop->next;
6076 xfree(prev);
6077 }
6078}
6079
6080extern int
6081onig_end(void)
6082{
6083 exec_end_call_list();
6084
6085#ifdef ONIG_DEBUG_STATISTICS
6086 onig_print_statistics(stderr);
6087#endif
6088
6089#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6090 _CrtDumpMemoryLeaks();
6091#endif
6092
6093 onig_inited = 0;
6094
6095 return 0;
6096}
6097
6098extern int
6099onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6100{
6101 OnigCodePoint n, *data;
6102 OnigCodePoint low, high, x;
6103
6104 GET_CODE_POINT(n, p);
6105 data = (OnigCodePoint* )p;
6106 data++;
6107
6108 for (low = 0, high = n; low < high; ) {
6109 x = (low + high) >> 1;
6110 if (code > data[x * 2 + 1])
6111 low = x + 1;
6112 else
6113 high = x;
6114 }
6115
6116 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6117}
6118
6119extern int
6120onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
6121{
6122 int found;
6123
6124 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6125 if (IS_NULL(cc->mbuf)) {
6126 found = 0;
6127 }
6128 else {
6129 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6130 }
6131 }
6132 else {
6133 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6134 }
6135
6136 if (IS_NCCLASS_NOT(cc))
6137 return !found;
6138 else
6139 return found;
6140}
6141
6142extern int
6143onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6144{
6145 int len;
6146
6147 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6148 len = 2;
6149 }
6150 else {
6151 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6152 }
6153 return onig_is_code_in_cc_len(len, code, cc);
6154}
6155
6156
6157#ifdef ONIG_DEBUG
6158
6159/* arguments type */
6160# define ARG_SPECIAL -1
6161# define ARG_NON 0
6162# define ARG_RELADDR 1
6163# define ARG_ABSADDR 2
6164# define ARG_LENGTH 3
6165# define ARG_MEMNUM 4
6166# define ARG_OPTION 5
6167# define ARG_STATE_CHECK 6
6168
6169OnigOpInfoType OnigOpInfo[] = {
6170 { OP_FINISH, "finish", ARG_NON },
6171 { OP_END, "end", ARG_NON },
6172 { OP_EXACT1, "exact1", ARG_SPECIAL },
6173 { OP_EXACT2, "exact2", ARG_SPECIAL },
6174 { OP_EXACT3, "exact3", ARG_SPECIAL },
6175 { OP_EXACT4, "exact4", ARG_SPECIAL },
6176 { OP_EXACT5, "exact5", ARG_SPECIAL },
6177 { OP_EXACTN, "exactn", ARG_SPECIAL },
6178 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6179 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6180 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6181 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6182 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6183 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6184 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6185 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6186 { OP_CCLASS, "cclass", ARG_SPECIAL },
6187 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6188 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6189 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6190 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6191 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6192 { OP_ANYCHAR, "anychar", ARG_NON },
6193 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6194 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6195 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6196 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6197 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6198 { OP_WORD, "word", ARG_NON },
6199 { OP_NOT_WORD, "not-word", ARG_NON },
6200 { OP_WORD_BOUND, "word-bound", ARG_NON },
6201 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6202 { OP_WORD_BEGIN, "word-begin", ARG_NON },
6203 { OP_WORD_END, "word-end", ARG_NON },
6204 { OP_ASCII_WORD, "ascii-word", ARG_NON },
6205 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6206 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6207 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6208 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6209 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6210 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6211 { OP_END_BUF, "end-buf", ARG_NON },
6212 { OP_BEGIN_LINE, "begin-line", ARG_NON },
6213 { OP_END_LINE, "end-line", ARG_NON },
6214 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6215 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6216 { OP_BACKREF1, "backref1", ARG_NON },
6217 { OP_BACKREF2, "backref2", ARG_NON },
6218 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6219 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6220 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6221 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6222 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6223 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6224 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6225 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6226 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6227 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6228 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6229 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6230 { OP_SET_OPTION, "set-option", ARG_OPTION },
6231 { OP_KEEP, "keep", ARG_NON },
6232 { OP_FAIL, "fail", ARG_NON },
6233 { OP_JUMP, "jump", ARG_RELADDR },
6234 { OP_PUSH, "push", ARG_RELADDR },
6235 { OP_POP, "pop", ARG_NON },
6236 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6237 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6238 { OP_REPEAT, "repeat", ARG_SPECIAL },
6239 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6240 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6241 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6242 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6243 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6244 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6245 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6246 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6247 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6248 { OP_PUSH_POS, "push-pos", ARG_NON },
6249 { OP_POP_POS, "pop-pos", ARG_NON },
6250 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6251 { OP_FAIL_POS, "fail-pos", ARG_NON },
6252 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6253 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6254 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6255 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6256 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6257 { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6258 { OP_ABSENT, "absent", ARG_RELADDR },
6259 { OP_ABSENT_END, "absent-end", ARG_NON },
6260 { OP_CALL, "call", ARG_ABSADDR },
6261 { OP_RETURN, "return", ARG_NON },
6262 { OP_CONDITION, "condition", ARG_SPECIAL },
6263 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6264 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6265 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6266 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6267 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6268 "state-check-anychar-ml*", ARG_STATE_CHECK },
6269 { -1, "", ARG_NON }
6270};
6271
6272static const char*
6273op2name(int opcode)
6274{
6275 int i;
6276
6277 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6278 if (opcode == OnigOpInfo[i].opcode)
6279 return OnigOpInfo[i].name;
6280 }
6281 return "";
6282}
6283
6284static int
6285op2arg_type(int opcode)
6286{
6287 int i;
6288
6289 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6290 if (opcode == OnigOpInfo[i].opcode)
6291 return OnigOpInfo[i].arg_type;
6292 }
6293 return ARG_SPECIAL;
6294}
6295
6296# ifdef ONIG_DEBUG_PARSE_TREE
6297static void
6298Indent(FILE* f, int indent)
6299{
6300 int i;
6301 for (i = 0; i < indent; i++) putc(' ', f);
6302}
6303# endif /* ONIG_DEBUG_PARSE_TREE */
6304
6305static void
6306p_string(FILE* f, ptrdiff_t len, UChar* s)
6307{
6308 fputs(":", f);
6309 while (len-- > 0) { fputc(*s++, f); }
6310}
6311
6312static void
6313p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6314{
6315 int x = len * mb_len;
6316
6317 fprintf(f, ":%d:", len);
6318 while (x-- > 0) { fputc(*s++, f); }
6319}
6320
6321extern void
6322onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6323 OnigEncoding enc)
6324{
6325 int i, n, arg_type;
6326 RelAddrType addr;
6327 LengthType len;
6328 MemNumType mem;
6329 StateCheckNumType scn;
6330 OnigCodePoint code;
6331 UChar *q;
6332
6333 fprintf(f, "[%s", op2name(*bp));
6334 arg_type = op2arg_type(*bp);
6335 if (arg_type != ARG_SPECIAL) {
6336 bp++;
6337 switch (arg_type) {
6338 case ARG_NON:
6339 break;
6340 case ARG_RELADDR:
6341 GET_RELADDR_INC(addr, bp);
6342 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6343 break;
6344 case ARG_ABSADDR:
6345 GET_ABSADDR_INC(addr, bp);
6346 fprintf(f, ":(%d)", addr);
6347 break;
6348 case ARG_LENGTH:
6349 GET_LENGTH_INC(len, bp);
6350 fprintf(f, ":%d", len);
6351 break;
6352 case ARG_MEMNUM:
6353 mem = *((MemNumType* )bp);
6354 bp += SIZE_MEMNUM;
6355 fprintf(f, ":%d", mem);
6356 break;
6357 case ARG_OPTION:
6358 {
6359 OnigOptionType option = *((OnigOptionType* )bp);
6360 bp += SIZE_OPTION;
6361 fprintf(f, ":%d", option);
6362 }
6363 break;
6364
6365 case ARG_STATE_CHECK:
6366 scn = *((StateCheckNumType* )bp);
6367 bp += SIZE_STATE_CHECK_NUM;
6368 fprintf(f, ":%d", scn);
6369 break;
6370 }
6371 }
6372 else {
6373 switch (*bp++) {
6374 case OP_EXACT1:
6375 case OP_ANYCHAR_STAR_PEEK_NEXT:
6376 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6377 p_string(f, 1, bp++); break;
6378 case OP_EXACT2:
6379 p_string(f, 2, bp); bp += 2; break;
6380 case OP_EXACT3:
6381 p_string(f, 3, bp); bp += 3; break;
6382 case OP_EXACT4:
6383 p_string(f, 4, bp); bp += 4; break;
6384 case OP_EXACT5:
6385 p_string(f, 5, bp); bp += 5; break;
6386 case OP_EXACTN:
6387 GET_LENGTH_INC(len, bp);
6388 p_len_string(f, len, 1, bp);
6389 bp += len;
6390 break;
6391
6392 case OP_EXACTMB2N1:
6393 p_string(f, 2, bp); bp += 2; break;
6394 case OP_EXACTMB2N2:
6395 p_string(f, 4, bp); bp += 4; break;
6396 case OP_EXACTMB2N3:
6397 p_string(f, 6, bp); bp += 6; break;
6398 case OP_EXACTMB2N:
6399 GET_LENGTH_INC(len, bp);
6400 p_len_string(f, len, 2, bp);
6401 bp += len * 2;
6402 break;
6403 case OP_EXACTMB3N:
6404 GET_LENGTH_INC(len, bp);
6405 p_len_string(f, len, 3, bp);
6406 bp += len * 3;
6407 break;
6408 case OP_EXACTMBN:
6409 {
6410 int mb_len;
6411
6412 GET_LENGTH_INC(mb_len, bp);
6413 GET_LENGTH_INC(len, bp);
6414 fprintf(f, ":%d:%d:", mb_len, len);
6415 n = len * mb_len;
6416 while (n-- > 0) { fputc(*bp++, f); }
6417 }
6418 break;
6419
6420 case OP_EXACT1_IC:
6421 len = enclen(enc, bp, bpend);
6422 p_string(f, len, bp);
6423 bp += len;
6424 break;
6425 case OP_EXACTN_IC:
6426 GET_LENGTH_INC(len, bp);
6427 p_len_string(f, len, 1, bp);
6428 bp += len;
6429 break;
6430
6431 case OP_CCLASS:
6432 n = bitset_on_num((BitSetRef )bp);
6433 bp += SIZE_BITSET;
6434 fprintf(f, ":%d", n);
6435 break;
6436
6437 case OP_CCLASS_NOT:
6438 n = bitset_on_num((BitSetRef )bp);
6439 bp += SIZE_BITSET;
6440 fprintf(f, ":%d", n);
6441 break;
6442
6443 case OP_CCLASS_MB:
6444 case OP_CCLASS_MB_NOT:
6445 GET_LENGTH_INC(len, bp);
6446 q = bp;
6447# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6448 ALIGNMENT_RIGHT(q);
6449# endif
6450 GET_CODE_POINT(code, q);
6451 bp += len;
6452 fprintf(f, ":%d:%d", (int )code, len);
6453 break;
6454
6455 case OP_CCLASS_MIX:
6456 case OP_CCLASS_MIX_NOT:
6457 n = bitset_on_num((BitSetRef )bp);
6458 bp += SIZE_BITSET;
6459 GET_LENGTH_INC(len, bp);
6460 q = bp;
6461# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6462 ALIGNMENT_RIGHT(q);
6463# endif
6464 GET_CODE_POINT(code, q);
6465 bp += len;
6466 fprintf(f, ":%d:%d:%d", n, (int )code, len);
6467 break;
6468
6469 case OP_BACKREFN_IC:
6470 mem = *((MemNumType* )bp);
6471 bp += SIZE_MEMNUM;
6472 fprintf(f, ":%d", mem);
6473 break;
6474
6475 case OP_BACKREF_MULTI_IC:
6476 case OP_BACKREF_MULTI:
6477 fputs(" ", f);
6478 GET_LENGTH_INC(len, bp);
6479 for (i = 0; i < len; i++) {
6480 GET_MEMNUM_INC(mem, bp);
6481 if (i > 0) fputs(", ", f);
6482 fprintf(f, "%d", mem);
6483 }
6484 break;
6485
6486 case OP_BACKREF_WITH_LEVEL:
6487 {
6488 OnigOptionType option;
6489 LengthType level;
6490
6491 GET_OPTION_INC(option, bp);
6492 fprintf(f, ":%d", option);
6493 GET_LENGTH_INC(level, bp);
6494 fprintf(f, ":%d", level);
6495
6496 fputs(" ", f);
6497 GET_LENGTH_INC(len, bp);
6498 for (i = 0; i < len; i++) {
6499 GET_MEMNUM_INC(mem, bp);
6500 if (i > 0) fputs(", ", f);
6501 fprintf(f, "%d", mem);
6502 }
6503 }
6504 break;
6505
6506 case OP_REPEAT:
6507 case OP_REPEAT_NG:
6508 {
6509 mem = *((MemNumType* )bp);
6510 bp += SIZE_MEMNUM;
6511 addr = *((RelAddrType* )bp);
6512 bp += SIZE_RELADDR;
6513 fprintf(f, ":%d:%d", mem, addr);
6514 }
6515 break;
6516
6517 case OP_PUSH_OR_JUMP_EXACT1:
6518 case OP_PUSH_IF_PEEK_NEXT:
6519 addr = *((RelAddrType* )bp);
6520 bp += SIZE_RELADDR;
6521 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
6522 p_string(f, 1, bp);
6523 bp += 1;
6524 break;
6525
6526 case OP_LOOK_BEHIND:
6527 GET_LENGTH_INC(len, bp);
6528 fprintf(f, ":%d", len);
6529 break;
6530
6531 case OP_PUSH_LOOK_BEHIND_NOT:
6532 GET_RELADDR_INC(addr, bp);
6533 GET_LENGTH_INC(len, bp);
6534 fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
6535 break;
6536
6537 case OP_STATE_CHECK_PUSH:
6538 case OP_STATE_CHECK_PUSH_OR_JUMP:
6539 scn = *((StateCheckNumType* )bp);
6540 bp += SIZE_STATE_CHECK_NUM;
6541 addr = *((RelAddrType* )bp);
6542 bp += SIZE_RELADDR;
6543 fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
6544 break;
6545
6546 case OP_CONDITION:
6547 GET_MEMNUM_INC(mem, bp);
6548 GET_RELADDR_INC(addr, bp);
6549 fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
6550 break;
6551
6552 default:
6553 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6554 bp[-1]);
6555 }
6556 }
6557 fputs("]", f);
6558 if (nextp) *nextp = bp;
6559}
6560
6561# ifdef ONIG_DEBUG_COMPILE
6562static void
6563print_compiled_byte_code_list(FILE* f, regex_t* reg)
6564{
6565 int ncode;
6566 UChar* bp = reg->p;
6567 UChar* end = reg->p + reg->used;
6568
6569 fprintf(f, "code length: %d", reg->used);
6570
6571 ncode = -1;
6572 while (bp < end) {
6573 ncode++;
6574 if (ncode % 5 == 0)
6575 fprintf(f, "\n%ld:", bp - reg->p);
6576 else
6577 fprintf(f, " %ld:", bp - reg->p);
6578 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6579 }
6580
6581 fprintf(f, "\n");
6582}
6583# endif /* ONIG_DEBUG_COMPILE */
6584
6585# ifdef ONIG_DEBUG_PARSE_TREE
6586static void
6587print_indent_tree(FILE* f, Node* node, int indent)
6588{
6589 int i, type, container_p = 0;
6590 int add = 3;
6591 UChar* p;
6592
6593 Indent(f, indent);
6594 if (IS_NULL(node)) {
6595 fprintf(f, "ERROR: null node!!!\n");
6596 exit (0);
6597 }
6598
6599 type = NTYPE(node);
6600 switch (type) {
6601 case NT_LIST:
6602 case NT_ALT:
6603 if (NTYPE(node) == NT_LIST)
6604 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6605 else
6606 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6607
6608 print_indent_tree(f, NCAR(node), indent + add);
6609 while (IS_NOT_NULL(node = NCDR(node))) {
6610 if (NTYPE(node) != type) {
6611 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6612 exit(0);
6613 }
6614 print_indent_tree(f, NCAR(node), indent + add);
6615 }
6616 break;
6617
6618 case NT_STR:
6619 fprintf(f, "<string%s:%"PRIxPTR">",
6620 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6621 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6622 if (*p >= 0x20 && *p < 0x7f)
6623 fputc(*p, f);
6624 else {
6625 fprintf(f, " 0x%02x", *p);
6626 }
6627 }
6628 break;
6629
6630 case NT_CCLASS:
6631 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6632 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
6633 if (NCCLASS(node)->mbuf) {
6634 BBuf* bbuf = NCCLASS(node)->mbuf;
6635 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6636 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6637 fprintf(f, "%d", *data++);
6638 for (; data < end; data+=2) {
6639 fprintf(f, ",");
6640 fprintf(f, "%04x-%04x", data[0], data[1]);
6641 }
6642 }
6643 break;
6644
6645 case NT_CTYPE:
6646 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6647 switch (NCTYPE(node)->ctype) {
6648 case ONIGENC_CTYPE_WORD:
6649 if (NCTYPE(node)->not != 0)
6650 fputs("not word", f);
6651 else
6652 fputs("word", f);
6653 break;
6654
6655 default:
6656 fprintf(f, "ERROR: undefined ctype.\n");
6657 exit(0);
6658 }
6659 break;
6660
6661 case NT_CANY:
6662 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6663 break;
6664
6665 case NT_ANCHOR:
6666 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6667 switch (NANCHOR(node)->type) {
6668 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6669 case ANCHOR_END_BUF: fputs("end buf", f); break;
6670 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6671 case ANCHOR_END_LINE: fputs("end line", f); break;
6672 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6673 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6674
6675 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6676 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6677# ifdef USE_WORD_BEGIN_END
6678 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6679 case ANCHOR_WORD_END: fputs("word end", f); break;
6680# endif
6681 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6682 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6683 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6684 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6685 case ANCHOR_KEEP: fputs("keep",f); break;
6686
6687 default:
6688 fprintf(f, "ERROR: undefined anchor type.\n");
6689 break;
6690 }
6691 break;
6692
6693 case NT_BREF:
6694 {
6695 int* p;
6696 BRefNode* br = NBREF(node);
6697 p = BACKREFS_P(br);
6698 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6699 for (i = 0; i < br->back_num; i++) {
6700 if (i > 0) fputs(", ", f);
6701 fprintf(f, "%d", p[i]);
6702 }
6703 }
6704 break;
6705
6706# ifdef USE_SUBEXP_CALL
6707 case NT_CALL:
6708 {
6709 CallNode* cn = NCALL(node);
6710 fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6711 p_string(f, cn->name_end - cn->name, cn->name);
6712 }
6713 break;
6714# endif
6715
6716 case NT_QTFR:
6717 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6718 NQTFR(node)->lower, NQTFR(node)->upper,
6719 (NQTFR(node)->greedy ? "" : "?"));
6720 print_indent_tree(f, NQTFR(node)->target, indent + add);
6721 break;
6722
6723 case NT_ENCLOSE:
6724 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6725 switch (NENCLOSE(node)->type) {
6726 case ENCLOSE_OPTION:
6727 fprintf(f, "option:%d", NENCLOSE(node)->option);
6728 break;
6729 case ENCLOSE_MEMORY:
6730 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6731 break;
6732 case ENCLOSE_STOP_BACKTRACK:
6733 fprintf(f, "stop-bt");
6734 break;
6735 case ENCLOSE_CONDITION:
6736 fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6737 break;
6738 case ENCLOSE_ABSENT:
6739 fprintf(f, "absent");
6740 break;
6741
6742 default:
6743 break;
6744 }
6745 fprintf(f, "\n");
6746 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6747 break;
6748
6749 default:
6750 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6751 break;
6752 }
6753
6754 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6755 type != NT_ENCLOSE)
6756 fprintf(f, "\n");
6757
6758 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6759
6760 fflush(f);
6761}
6762
6763static void
6764print_tree(FILE* f, Node* node)
6765{
6766 print_indent_tree(f, node, 0);
6767}
6768# endif /* ONIG_DEBUG_PARSE_TREE */
6769#endif /* ONIG_DEBUG */
#define xfree
Old name of ruby_xfree.
Definition: xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition: xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition: xmalloc.h:53
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
Definition: regint.h:458