Ruby 3.2.1p31 (2023-02-08 revision 31819e82c88c6f8ecfaeb162519bfa26a14b21fd)
regexec.c
1/**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regint.h"
32
33#ifdef RUBY
34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35#else
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37#endif
38
39#ifndef USE_TOKEN_THREADED_VM
40# ifdef __GNUC__
41# define USE_TOKEN_THREADED_VM 1
42# else
43# define USE_TOKEN_THREADED_VM 0
44# endif
45#endif
46
47#ifdef RUBY
48# define ENC_DUMMY_FLAG (1<<24)
49static inline int
50rb_enc_asciicompat(OnigEncoding enc)
51{
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
53}
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59#endif /* RUBY */
60
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67static int
68is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69 const UChar *end, OnigOptionType option, int check_prev)
70{
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73 if (check_prev) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76 return 0;
77 else
78 return 1;
79 }
80 else
81 return 1;
82 }
83 else {
84 const UChar *pnext = p + enclen(enc, p, end);
85 if (pnext < end &&
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88 return 1;
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90 return 1;
91 return 0;
92 }
93 }
94 else {
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
96 }
97}
98#else /* USE_CRNL_AS_LINE_TERMINATOR */
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101#endif /* USE_CRNL_AS_LINE_TERMINATOR */
102
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
105
106static void
107history_tree_clear(OnigCaptureTreeNode* node)
108{
109 int i;
110
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
115 }
116 }
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
123 node->group = -1;
124 xfree(node->childs);
125 node->childs = (OnigCaptureTreeNode** )0;
126 }
127}
128
129static void
130history_tree_free(OnigCaptureTreeNode* node)
131{
132 history_tree_clear(node);
133 xfree(node);
134}
135
136static void
137history_root_free(OnigRegion* r)
138{
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
142 }
143}
144
145static OnigCaptureTreeNode*
146history_node_new(void)
147{
148 OnigCaptureTreeNode* node;
149
150 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
153 node->allocated = 0;
154 node->num_childs = 0;
155 node->group = -1;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
158
159 return node;
160}
161
162static int
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
164{
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
166
167 if (parent->num_childs >= parent->allocated) {
168 int n, i;
169
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
172 parent->childs =
173 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
175 }
176 else {
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
179 tmp =
180 (OnigCaptureTreeNode** )xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
182 if (tmp == 0) {
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
185 }
186 parent->childs = tmp;
187 }
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
190 }
191 parent->allocated = n;
192 }
193
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
196 return 0;
197}
198
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
201{
202 int i, r;
203 OnigCaptureTreeNode *clone, *child;
204
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
207
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
215 }
216 r = history_tree_add_child(clone, child);
217 if (r != 0) {
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
221 }
222 }
223
224 return clone;
225}
226
227extern OnigCaptureTreeNode*
228onig_get_capture_tree(OnigRegion* region)
229{
230 return region->history_root;
231}
232#endif /* USE_CAPTURE_HISTORY */
233
234#ifdef USE_CACHE_MATCH_OPT
235
236/* count number of jump-like opcodes for allocation of cache memory. */
237static OnigPosition
238count_num_cache_opcode(regex_t* reg, long* num, long* table_size)
239{
240 UChar* p = reg->p;
241 UChar* pend = p + reg->used;
242 LengthType len;
243 MemNumType mem;
244 MemNumType current_mem = -1;
245 long current_mem_num = 0;
246 OnigEncoding enc = reg->enc;
247
248 *num = 0;
249 *table_size = 0;
250
251 while (p < pend) {
252 switch (*p++) {
253 case OP_FINISH:
254 case OP_END:
255 break;
256
257 case OP_EXACT1: p++; break;
258 case OP_EXACT2: p += 2; break;
259 case OP_EXACT3: p += 3; break;
260 case OP_EXACT4: p += 4; break;
261 case OP_EXACT5: p += 5; break;
262 case OP_EXACTN:
263 GET_LENGTH_INC(len, p); p += len; break;
264 case OP_EXACTMB2N1: p += 2; break;
265 case OP_EXACTMB2N2: p += 4; break;
266 case OP_EXACTMB2N3: p += 6; break;
267 case OP_EXACTMB2N:
268 GET_LENGTH_INC(len, p); p += len * 2; break;
269 case OP_EXACTMB3N:
270 GET_LENGTH_INC(len, p); p += len * 3; break;
271 case OP_EXACTMBN:
272 {
273 int mb_len;
274 GET_LENGTH_INC(mb_len, p);
275 GET_LENGTH_INC(len, p);
276 p += mb_len * len;
277 }
278 break;
279
280 case OP_EXACT1_IC:
281 len = enclen(enc, p, pend); p += len; break;
282 case OP_EXACTN_IC:
283 GET_LENGTH_INC(len, p); p += len; break;
284
285 case OP_CCLASS:
286 case OP_CCLASS_NOT:
287 p += SIZE_BITSET; break;
288 case OP_CCLASS_MB:
289 case OP_CCLASS_MB_NOT:
290 GET_LENGTH_INC(len, p); p += len; break;
291 case OP_CCLASS_MIX:
292 case OP_CCLASS_MIX_NOT:
293 p += SIZE_BITSET;
294 GET_LENGTH_INC(len, p);
295 p += len;
296 break;
297
298 case OP_ANYCHAR:
299 case OP_ANYCHAR_ML:
300 break;
301 case OP_ANYCHAR_STAR:
302 case OP_ANYCHAR_ML_STAR:
303 *num += 1; *table_size += 1; break;
304 case OP_ANYCHAR_STAR_PEEK_NEXT:
305 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
306 p++; *num += 1; *table_size += 1; break;
307
308 case OP_WORD:
309 case OP_NOT_WORD:
310 case OP_WORD_BOUND:
311 case OP_NOT_WORD_BOUND:
312 case OP_WORD_BEGIN:
313 case OP_WORD_END:
314 break;
315
316 case OP_ASCII_WORD:
317 case OP_NOT_ASCII_WORD:
318 case OP_ASCII_WORD_BOUND:
319 case OP_NOT_ASCII_WORD_BOUND:
320 case OP_ASCII_WORD_BEGIN:
321 case OP_ASCII_WORD_END:
322 break;
323
324 case OP_BEGIN_BUF:
325 case OP_END_BUF:
326 case OP_BEGIN_LINE:
327 case OP_END_LINE:
328 case OP_SEMI_END_BUF:
329 case OP_BEGIN_POSITION:
330 break;
331
332 case OP_BACKREF1:
333 case OP_BACKREF2:
334 case OP_BACKREFN:
335 case OP_BACKREFN_IC:
336 case OP_BACKREF_MULTI:
337 case OP_BACKREF_MULTI_IC:
338 case OP_BACKREF_WITH_LEVEL:
339 goto fail;
340
341 case OP_MEMORY_START:
342 case OP_MEMORY_START_PUSH:
343 case OP_MEMORY_END_PUSH:
344 case OP_MEMORY_END_PUSH_REC:
345 case OP_MEMORY_END:
346 case OP_MEMORY_END_REC:
347 p += SIZE_MEMNUM; break;
348
349 case OP_KEEP:
350 break;
351
352 case OP_FAIL:
353 break;
354 case OP_JUMP:
355 p += SIZE_RELADDR;
356 break;
357 case OP_PUSH:
358 p += SIZE_RELADDR;
359 *num += 1;
360 *table_size += 1;
361 break;
362 case OP_POP:
363 break;
364 case OP_PUSH_OR_JUMP_EXACT1:
365 case OP_PUSH_IF_PEEK_NEXT:
366 p += SIZE_RELADDR + 1; *num += 1; *table_size += 1; break;
367 case OP_REPEAT:
368 case OP_REPEAT_NG:
369 if (current_mem != -1) {
370 // A nested OP_REPEAT is not yet supported.
371 goto fail;
372 }
373 GET_MEMNUM_INC(mem, p);
374 p += SIZE_RELADDR;
375 if (reg->repeat_range[mem].lower == 0) {
376 *num += 1;
377 *table_size += 1;
378 }
379 reg->repeat_range[mem].base_num = *num;
380 current_mem = mem;
381 current_mem_num = *num;
382 break;
383 case OP_REPEAT_INC:
384 case OP_REPEAT_INC_NG:
385 GET_MEMNUM_INC(mem, p);
386 if (mem != current_mem) {
387 // A lone or invalid OP_REPEAT_INC is found.
388 goto fail;
389 }
390 {
391 long inner_num = *num - current_mem_num;
392 OnigRepeatRange *repeat_range = &reg->repeat_range[mem];
393 repeat_range->inner_num = inner_num;
394 *num -= inner_num;
395 *num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
396 if (repeat_range->lower < repeat_range->upper) {
397 *table_size += 1;
398 }
399 current_mem = -1;
400 current_mem_num = 0;
401 }
402 break;
403 case OP_REPEAT_INC_SG:
404 case OP_REPEAT_INC_NG_SG:
405 // TODO: Support nested OP_REPEAT.
406 goto fail;
407 case OP_NULL_CHECK_START:
408 case OP_NULL_CHECK_END:
409 case OP_NULL_CHECK_END_MEMST:
410 case OP_NULL_CHECK_END_MEMST_PUSH:
411 p += SIZE_MEMNUM; break;
412
413 case OP_PUSH_POS:
414 case OP_POP_POS:
415 case OP_PUSH_POS_NOT:
416 case OP_FAIL_POS:
417 case OP_PUSH_STOP_BT:
418 case OP_POP_STOP_BT:
419 case OP_LOOK_BEHIND:
420 case OP_PUSH_LOOK_BEHIND_NOT:
421 case OP_FAIL_LOOK_BEHIND_NOT:
422 case OP_PUSH_ABSENT_POS:
423 case OP_ABSENT_END:
424 case OP_ABSENT:
425 goto fail;
426
427 case OP_CALL:
428 case OP_RETURN:
429 goto fail;
430
431 case OP_CONDITION:
432 goto fail;
433
434 case OP_STATE_CHECK_PUSH:
435 case OP_STATE_CHECK_PUSH_OR_JUMP:
436 case OP_STATE_CHECK:
437 case OP_STATE_CHECK_ANYCHAR_STAR:
438 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
439 goto fail;
440
441 case OP_SET_OPTION_PUSH:
442 case OP_SET_OPTION:
443 p += SIZE_OPTION;
444 break;
445
446 default:
447 goto bytecode_error;
448 }
449 }
450
451 return 0;
452
453fail:
454 *num = NUM_CACHE_OPCODE_FAIL;
455 return 0;
456
457bytecode_error:
458 return ONIGERR_UNDEFINED_BYTECODE;
459}
460
461static OnigPosition
462init_cache_index_table(regex_t* reg, OnigCacheIndex *table)
463{
464 UChar* pbegin;
465 UChar* p = reg->p;
466 UChar* pend = p + reg->used;
467 LengthType len;
468 MemNumType mem;
469 MemNumType current_mem = -1;
470 long num = 0;
471 long current_mem_num = 0;
472 OnigEncoding enc = reg->enc;
473
474 while (p < pend) {
475 pbegin = p;
476 switch (*p++) {
477 case OP_FINISH:
478 case OP_END:
479 break;
480
481 case OP_EXACT1: p++; break;
482 case OP_EXACT2: p += 2; break;
483 case OP_EXACT3: p += 3; break;
484 case OP_EXACT4: p += 4; break;
485 case OP_EXACT5: p += 5; break;
486 case OP_EXACTN:
487 GET_LENGTH_INC(len, p); p += len; break;
488 case OP_EXACTMB2N1: p += 2; break;
489 case OP_EXACTMB2N2: p += 4; break;
490 case OP_EXACTMB2N3: p += 6; break;
491 case OP_EXACTMB2N:
492 GET_LENGTH_INC(len, p); p += len * 2; break;
493 case OP_EXACTMB3N:
494 GET_LENGTH_INC(len, p); p += len * 3; break;
495 case OP_EXACTMBN:
496 {
497 int mb_len;
498 GET_LENGTH_INC(mb_len, p);
499 GET_LENGTH_INC(len, p);
500 p += mb_len * len;
501 }
502 break;
503
504 case OP_EXACT1_IC:
505 len = enclen(enc, p, pend); p += len; break;
506 case OP_EXACTN_IC:
507 GET_LENGTH_INC(len, p); p += len; break;
508
509 case OP_CCLASS:
510 case OP_CCLASS_NOT:
511 p += SIZE_BITSET; break;
512 case OP_CCLASS_MB:
513 case OP_CCLASS_MB_NOT:
514 GET_LENGTH_INC(len, p); p += len; break;
515 case OP_CCLASS_MIX:
516 case OP_CCLASS_MIX_NOT:
517 p += SIZE_BITSET;
518 GET_LENGTH_INC(len, p);
519 p += len;
520 break;
521
522 case OP_ANYCHAR:
523 case OP_ANYCHAR_ML:
524 break;
525 case OP_ANYCHAR_STAR:
526 case OP_ANYCHAR_ML_STAR:
527 table->addr = pbegin;
528 table->num = num - current_mem_num;
529 table->outer_repeat = current_mem;
530 num++;
531 table++;
532 break;
533 case OP_ANYCHAR_STAR_PEEK_NEXT:
534 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
535 p++;
536 table->addr = pbegin;
537 table->num = num - current_mem_num;
538 table->outer_repeat = current_mem;
539 num++;
540 table++;
541 break;
542
543 case OP_WORD:
544 case OP_NOT_WORD:
545 case OP_WORD_BOUND:
546 case OP_NOT_WORD_BOUND:
547 case OP_WORD_BEGIN:
548 case OP_WORD_END:
549 break;
550
551 case OP_ASCII_WORD:
552 case OP_NOT_ASCII_WORD:
553 case OP_ASCII_WORD_BOUND:
554 case OP_NOT_ASCII_WORD_BOUND:
555 case OP_ASCII_WORD_BEGIN:
556 case OP_ASCII_WORD_END:
557 break;
558
559 case OP_BEGIN_BUF:
560 case OP_END_BUF:
561 case OP_BEGIN_LINE:
562 case OP_END_LINE:
563 case OP_SEMI_END_BUF:
564 case OP_BEGIN_POSITION:
565 break;
566
567 case OP_BACKREF1:
568 case OP_BACKREF2:
569 case OP_BACKREFN:
570 case OP_BACKREFN_IC:
571 case OP_BACKREF_MULTI:
572 case OP_BACKREF_MULTI_IC:
573 case OP_BACKREF_WITH_LEVEL:
574 goto unexpected_bytecode_error;
575
576 case OP_MEMORY_START:
577 case OP_MEMORY_START_PUSH:
578 case OP_MEMORY_END_PUSH:
579 case OP_MEMORY_END_PUSH_REC:
580 case OP_MEMORY_END:
581 case OP_MEMORY_END_REC:
582 p += SIZE_MEMNUM; break;
583
584 case OP_KEEP:
585 break;
586
587 case OP_FAIL:
588 break;
589 case OP_JUMP:
590 p += SIZE_RELADDR;
591 break;
592 case OP_PUSH:
593 p += SIZE_RELADDR;
594 table->addr = pbegin;
595 table->num = num - current_mem_num;
596 table->outer_repeat = current_mem;
597 num++;
598 table++;
599 break;
600 case OP_POP:
601 break;
602 case OP_PUSH_OR_JUMP_EXACT1:
603 case OP_PUSH_IF_PEEK_NEXT:
604 p += SIZE_RELADDR + 1;
605 table->addr = pbegin;
606 table->num = num - current_mem_num;
607 table->outer_repeat = current_mem;
608 num++;
609 table++;
610 break;
611 case OP_REPEAT:
612 case OP_REPEAT_NG:
613 GET_MEMNUM_INC(mem, p);
614 p += SIZE_RELADDR;
615 if (reg->repeat_range[mem].lower == 0) {
616 table->addr = pbegin;
617 table->num = num - current_mem_num;
618 table->outer_repeat = -1;
619 num++;
620 table++;
621 }
622 current_mem = mem;
623 current_mem_num = num;
624 break;
625 case OP_REPEAT_INC:
626 case OP_REPEAT_INC_NG:
627 GET_MEMNUM_INC(mem, p);
628 {
629 long inner_num = num - current_mem_num;
630 OnigRepeatRange *repeat_range = &reg->repeat_range[mem];
631 if (repeat_range->lower < repeat_range->upper) {
632 table->addr = pbegin;
633 table->num = num - current_mem_num;
634 table->outer_repeat = mem;
635 table++;
636 }
637 num -= inner_num;
638 num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
639 current_mem = -1;
640 current_mem_num = 0;
641 }
642 break;
643 case OP_REPEAT_INC_SG:
644 case OP_REPEAT_INC_NG_SG:
645 // TODO: support OP_REPEAT opcodes.
646 goto unexpected_bytecode_error;
647 case OP_NULL_CHECK_START:
648 case OP_NULL_CHECK_END:
649 case OP_NULL_CHECK_END_MEMST:
650 case OP_NULL_CHECK_END_MEMST_PUSH:
651 p += SIZE_MEMNUM; break;
652
653 case OP_PUSH_POS:
654 case OP_POP_POS:
655 case OP_PUSH_POS_NOT:
656 case OP_FAIL_POS:
657 case OP_PUSH_STOP_BT:
658 case OP_POP_STOP_BT:
659 case OP_LOOK_BEHIND:
660 case OP_PUSH_LOOK_BEHIND_NOT:
661 case OP_FAIL_LOOK_BEHIND_NOT:
662 case OP_PUSH_ABSENT_POS:
663 case OP_ABSENT_END:
664 case OP_ABSENT:
665 goto unexpected_bytecode_error;
666
667 case OP_CALL:
668 case OP_RETURN:
669 goto unexpected_bytecode_error;
670
671 case OP_CONDITION:
672 goto unexpected_bytecode_error;
673
674 case OP_STATE_CHECK_PUSH:
675 case OP_STATE_CHECK_PUSH_OR_JUMP:
676 case OP_STATE_CHECK:
677 case OP_STATE_CHECK_ANYCHAR_STAR:
678 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
679 goto unexpected_bytecode_error;
680
681 case OP_SET_OPTION_PUSH:
682 case OP_SET_OPTION:
683 p += SIZE_OPTION;
684 break;
685
686 default:
687 goto bytecode_error;
688 }
689 }
690
691 return 0;
692
693unexpected_bytecode_error:
694 return ONIGERR_UNEXPECTED_BYTECODE;
695
696bytecode_error:
697 return ONIGERR_UNDEFINED_BYTECODE;
698}
699#else /* USE_MATCH_CACHE */
700static OnigPosition
701count_num_cache_opcode(regex_t* reg, long* num, long* table_size)
702{
703 *num = NUM_CACHE_OPCODE_FAIL;
704 return 0;
705}
706#endif
707
708extern int
709onig_check_linear_time(OnigRegexType* reg)
710{
711 long num = 0, table_size = 0;
712 count_num_cache_opcode(reg, &num, &table_size);
713 return num != NUM_CACHE_OPCODE_FAIL;
714}
715
716extern void
717onig_region_clear(OnigRegion* region)
718{
719 int i;
720
721 for (i = 0; i < region->num_regs; i++) {
722 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
723 }
724#ifdef USE_CAPTURE_HISTORY
725 history_root_free(region);
726#endif
727}
728
729extern int
730onig_region_resize(OnigRegion* region, int n)
731{
732 region->num_regs = n;
733
734 if (n < ONIG_NREGION)
735 n = ONIG_NREGION;
736
737 if (region->allocated == 0) {
738 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
739 if (region->beg == 0)
740 return ONIGERR_MEMORY;
741
742 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
743 if (region->end == 0) {
744 xfree(region->beg);
745 return ONIGERR_MEMORY;
746 }
747
748 region->allocated = n;
749 }
750 else if (region->allocated < n) {
751 OnigPosition *tmp;
752
753 region->allocated = 0;
754 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
755 if (tmp == 0) {
756 xfree(region->beg);
757 xfree(region->end);
758 return ONIGERR_MEMORY;
759 }
760 region->beg = tmp;
761 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
762 if (tmp == 0) {
763 xfree(region->beg);
764 xfree(region->end);
765 return ONIGERR_MEMORY;
766 }
767 region->end = tmp;
768
769 region->allocated = n;
770 }
771
772 return 0;
773}
774
775static int
776onig_region_resize_clear(OnigRegion* region, int n)
777{
778 int r;
779
780 r = onig_region_resize(region, n);
781 if (r != 0) return r;
782 onig_region_clear(region);
783 return 0;
784}
785
786extern int
787onig_region_set(OnigRegion* region, int at, int beg, int end)
788{
789 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
790
791 if (at >= region->allocated) {
792 int r = onig_region_resize(region, at + 1);
793 if (r < 0) return r;
794 }
795
796 region->beg[at] = beg;
797 region->end[at] = end;
798 return 0;
799}
800
801extern void
802onig_region_init(OnigRegion* region)
803{
804 region->num_regs = 0;
805 region->allocated = 0;
806 region->beg = (OnigPosition* )0;
807 region->end = (OnigPosition* )0;
808#ifdef USE_CAPTURE_HISTORY
809 region->history_root = (OnigCaptureTreeNode* )0;
810#endif
811}
812
813extern OnigRegion*
814onig_region_new(void)
815{
816 OnigRegion* r;
817
818 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
819 if (r)
820 onig_region_init(r);
821 return r;
822}
823
824extern void
825onig_region_free(OnigRegion* r, int free_self)
826{
827 if (r) {
828 if (r->allocated > 0) {
829 if (r->beg) xfree(r->beg);
830 if (r->end) xfree(r->end);
831 r->allocated = 0;
832 }
833#ifdef USE_CAPTURE_HISTORY
834 history_root_free(r);
835#endif
836 if (free_self) xfree(r);
837 }
838}
839
840extern void
841onig_region_copy(OnigRegion* to, const OnigRegion* from)
842{
843#define RREGC_SIZE (sizeof(int) * from->num_regs)
844 int i, r;
845
846 if (to == from) return;
847
848 r = onig_region_resize(to, from->num_regs);
849 if (r) return;
850
851 for (i = 0; i < from->num_regs; i++) {
852 to->beg[i] = from->beg[i];
853 to->end[i] = from->end[i];
854 }
855 to->num_regs = from->num_regs;
856
857#ifdef USE_CAPTURE_HISTORY
858 history_root_free(to);
859
860 if (IS_NOT_NULL(from->history_root)) {
861 to->history_root = history_tree_clone(from->history_root);
862 }
863#endif
864}
865
866
868#define INVALID_STACK_INDEX -1
869
870/* stack type */
871/* used by normal-POP */
872#define STK_ALT 0x0001
873#define STK_LOOK_BEHIND_NOT 0x0002
874#define STK_POS_NOT 0x0003
875/* handled by normal-POP */
876#define STK_MEM_START 0x0100
877#define STK_MEM_END 0x8200
878#define STK_REPEAT_INC 0x0300
879#define STK_STATE_CHECK_MARK 0x1000
880/* avoided by normal-POP */
881#define STK_NULL_CHECK_START 0x3000
882#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
883#define STK_MEM_END_MARK 0x8400
884#define STK_POS 0x0500 /* used when POP-POS */
885#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
886#define STK_REPEAT 0x0700
887#define STK_CALL_FRAME 0x0800
888#define STK_RETURN 0x0900
889#define STK_VOID 0x0a00 /* for fill a blank */
890#define STK_ABSENT_POS 0x0b00 /* for absent */
891#define STK_ABSENT 0x0c00 /* absent inner loop marker */
892
893/* stack type check mask */
894#define STK_MASK_POP_USED 0x00ff
895#define STK_MASK_TO_VOID_TARGET 0x10ff
896#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
897
898#ifdef USE_CACHE_MATCH_OPT
899#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) do {\
900 (msa).enable_cache_match_opt = 0;\
901 (msa).num_fail = 0;\
902 (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\
903 (msa).num_cache_table = 0;\
904 (msa).cache_index_table = (OnigCacheIndex *)0;\
905 (msa).match_cache = (uint8_t *)0;\
906} while(0)
907#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\
908 if ((msa).cache_index_table) xfree((msa).cache_index_table);\
909 if ((msa).match_cache) xfree((msa).match_cache);\
910} while(0)
911#else
912#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa)
913#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa)
914#endif
915
916#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
917# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
918 (msa).stack_p = (void* )0;\
919 (msa).options = (arg_option);\
920 (msa).region = (arg_region);\
921 (msa).start = (arg_start);\
922 (msa).gpos = (arg_gpos);\
923 (msa).best_len = ONIG_MISMATCH;\
924 (msa).counter = 0;\
925 (msa).end_time = 0;\
926 MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\
927} while(0)
928#else
929# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
930 (msa).stack_p = (void* )0;\
931 (msa).options = (arg_option);\
932 (msa).region = (arg_region);\
933 (msa).start = (arg_start);\
934 (msa).gpos = (arg_gpos);\
935 (msa).counter = 0;\
936 (msa).end_time = 0;\
937 MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\
938} while(0)
939#endif
940
941#ifdef USE_COMBINATION_EXPLOSION_CHECK
942
943# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
944
945# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
946 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
947 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
948 offset = ((offset) * (state_num)) >> 3;\
949 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
950 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
951 (msa).state_check_buff = (void* )xmalloc(size);\
952 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
953 }\
954 else \
955 (msa).state_check_buff = (void* )xalloca(size);\
956 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
957 (size_t )(size - (offset))); \
958 (msa).state_check_buff_size = size;\
959 }\
960 else {\
961 (msa).state_check_buff = (void* )0;\
962 (msa).state_check_buff_size = 0;\
963 }\
964 }\
965 else {\
966 (msa).state_check_buff = (void* )0;\
967 (msa).state_check_buff_size = 0;\
968 }\
969 } while(0)
970
971# define MATCH_ARG_FREE(msa) do {\
972 if ((msa).stack_p) xfree((msa).stack_p);\
973 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
974 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
975 }\
976 MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\
977} while(0)
978#else /* USE_COMBINATION_EXPLOSION_CHECK */
979# define MATCH_ARG_FREE(msa) do {\
980 if ((msa).stack_p) xfree((msa).stack_p);\
981 MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\
982} while (0)
983#endif /* USE_COMBINATION_EXPLOSION_CHECK */
984
985
986
987#define MAX_PTR_NUM 100
988
989#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
990 if (ptr_num > MAX_PTR_NUM) {\
991 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
992 heap_addr = alloc_addr;\
993 if (msa->stack_p) {\
994 stk_alloc = (OnigStackType* )(msa->stack_p);\
995 stk_base = stk_alloc;\
996 stk = stk_base;\
997 stk_end = stk_base + msa->stack_n;\
998 } else {\
999 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1000 stk_base = stk_alloc;\
1001 stk = stk_base;\
1002 stk_end = stk_base + (stack_num);\
1003 }\
1004 } else if (msa->stack_p) {\
1005 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1006 heap_addr = NULL;\
1007 stk_alloc = (OnigStackType* )(msa->stack_p);\
1008 stk_base = stk_alloc;\
1009 stk = stk_base;\
1010 stk_end = stk_base + msa->stack_n;\
1011 }\
1012 else {\
1013 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1014 + sizeof(OnigStackType) * (stack_num));\
1015 heap_addr = NULL;\
1016 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1017 stk_base = stk_alloc;\
1018 stk = stk_base;\
1019 stk_end = stk_base + (stack_num);\
1020 }\
1021} while(0)
1022
1023#define STACK_SAVE do{\
1024 if (stk_base != stk_alloc) {\
1025 msa->stack_p = stk_base;\
1026 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
1027 };\
1028} while(0)
1029
1030static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1031
1032extern unsigned int
1033onig_get_match_stack_limit_size(void)
1034{
1035 return MatchStackLimitSize;
1036}
1037
1038extern int
1039onig_set_match_stack_limit_size(unsigned int size)
1040{
1041 MatchStackLimitSize = size;
1042 return 0;
1043}
1044
1045static int
1046stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
1047 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
1048{
1049 size_t n;
1050 OnigStackType *x, *stk_base, *stk_end, *stk;
1051
1052 stk_base = *arg_stk_base;
1053 stk_end = *arg_stk_end;
1054 stk = *arg_stk;
1055
1056 n = stk_end - stk_base;
1057 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
1058 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
1059 if (IS_NULL(x)) {
1060 STACK_SAVE;
1061 return ONIGERR_MEMORY;
1062 }
1063 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
1064 n *= 2;
1065 }
1066 else {
1067 unsigned int limit_size = MatchStackLimitSize;
1068 n *= 2;
1069 if (limit_size != 0 && n > limit_size) {
1070 if ((unsigned int )(stk_end - stk_base) == limit_size)
1071 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1072 else
1073 n = limit_size;
1074 }
1075 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
1076 if (IS_NULL(x)) {
1077 STACK_SAVE;
1078 return ONIGERR_MEMORY;
1079 }
1080 }
1081 *arg_stk = x + (stk - stk_base);
1082 *arg_stk_base = x;
1083 *arg_stk_end = x + n;
1084 return 0;
1085}
1086
1087#define STACK_ENSURE(n) do {\
1088 if (stk_end - stk < (n)) {\
1089 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1090 if (r != 0) {\
1091 STACK_SAVE;\
1092 if (xmalloc_base) xfree(xmalloc_base);\
1093 return r;\
1094 }\
1095 }\
1096} while(0)
1097
1098#define STACK_AT(index) (stk_base + (index))
1099#define GET_STACK_INDEX(stk) ((stk) - stk_base)
1100
1101#define STACK_PUSH_TYPE(stack_type) do {\
1102 STACK_ENSURE(1);\
1103 stk->type = (stack_type);\
1104 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1105 STACK_INC;\
1106} while(0)
1107
1108#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1109
1110#ifdef USE_COMBINATION_EXPLOSION_CHECK
1111# define STATE_CHECK_POS(s,snum) \
1112 (((s) - str) * num_comb_exp_check + ((snum) - 1))
1113# define STATE_CHECK_VAL(v,snum) do {\
1114 if (state_check_buff != NULL) {\
1115 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1116 (v) = state_check_buff[x/8] & (1<<(x%8));\
1117 }\
1118 else (v) = 0;\
1119} while(0)
1120
1121
1122# define ELSE_IF_STATE_CHECK_MARK(stk) \
1123 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1124 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1125 state_check_buff[x/8] |= (1<<(x%8)); \
1126 }
1127
1128# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1129 STACK_ENSURE(1);\
1130 stk->type = (stack_type);\
1131 stk->u.state.pcode = (pat);\
1132 stk->u.state.pstr = (s);\
1133 stk->u.state.pstr_prev = (sprev);\
1134 stk->u.state.state_check = 0;\
1135 stk->u.state.pkeep = (keep);\
1136 STACK_INC;\
1137} while(0)
1138
1139# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1140 stk->type = (stack_type);\
1141 stk->u.state.pcode = (pat);\
1142 stk->u.state.state_check = 0;\
1143 STACK_INC;\
1144} while(0)
1145
1146# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1147 STACK_ENSURE(1);\
1148 stk->type = STK_ALT;\
1149 stk->u.state.pcode = (pat);\
1150 stk->u.state.pstr = (s);\
1151 stk->u.state.pstr_prev = (sprev);\
1152 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1153 stk->u.state.pkeep = (keep);\
1154 STACK_INC;\
1155} while(0)
1156
1157# define STACK_PUSH_STATE_CHECK(s,snum) do {\
1158 if (state_check_buff != NULL) {\
1159 STACK_ENSURE(1);\
1160 stk->type = STK_STATE_CHECK_MARK;\
1161 stk->u.state.pstr = (s);\
1162 stk->u.state.state_check = (snum);\
1163 STACK_INC;\
1164 }\
1165} while(0)
1166
1167#else /* USE_COMBINATION_EXPLOSION_CHECK */
1168
1169# define ELSE_IF_STATE_CHECK_MARK(stk)
1170
1171# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1172 STACK_ENSURE(1);\
1173 stk->type = (stack_type);\
1174 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1175 stk->u.state.pcode = (pat);\
1176 stk->u.state.pstr = (s);\
1177 stk->u.state.pstr_prev = (sprev);\
1178 stk->u.state.pkeep = (keep);\
1179 STACK_INC;\
1180} while(0)
1181
1182# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1183 stk->type = (stack_type);\
1184 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1185 stk->u.state.pcode = (pat);\
1186 STACK_INC;\
1187} while(0)
1188#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1189
1190#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1191#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1192#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1193#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
1194#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
1195#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1196 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1197
1198#ifdef USE_CACHE_MATCH_OPT
1199
1200#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\
1201 if (enable) {\
1202 long cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\
1203 if (cache_index >= 0) {\
1204 long key = (num_cache_size) * (long)(pos) + cache_index;\
1205 long index = key >> 3;\
1206 long mask = 1 << (key & 7);\
1207 if ((match_cache)[index] & mask) {\
1208 goto fail;\
1209 }\
1210 (match_cache)[index] |= mask;\
1211 }\
1212 }\
1213} while (0)
1214
1215static long
1216find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, long num_cache_table, UChar* p)
1217{
1218 long l = 0, r = num_cache_table - 1, m = 0;
1219 OnigCacheIndex* item;
1220 OnigRepeatRange* range;
1221 OnigStackType *stkp;
1222 int count = 0;
1223 int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
1224
1225 while (l <= r) {
1226 m = (l + r) / 2;
1227 if (table[m].addr == p) break;
1228 if (table[m].addr < p) l = m + 1;
1229 else r = m - 1;
1230 }
1231
1232 if (!(0 <= m && m < num_cache_table && table[m].addr == p)) {
1233 return -1;
1234 }
1235
1236 item = &table[m];
1237 if (item->outer_repeat == -1) {
1238 return item->num;
1239 }
1240
1241 range = &reg->repeat_range[item->outer_repeat];
1242
1243 stkp = &stk[repeat_stk[item->outer_repeat]];
1244 count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
1245
1246 if (count < range->lower) {
1247 return range->base_num + range->inner_num * count + item->num;
1248 }
1249
1250 if (range->upper == 0x7fffffff) {
1251 return range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item->num;
1252 }
1253
1254 return range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (count - range->lower) + item->num;
1255}
1256
1257static void
1258reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, long pos, uint8_t* match_cache, OnigCacheIndex *table, long num_cache_size, long num_cache_table)
1259{
1260 long l = 0, r = num_cache_table - 1, m1 = 0, m2 = 0;
1261 int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG;
1262 OnigCacheIndex *item1, *item2;
1263 long k1, k2, base;
1264
1265 while (l <= r) {
1266 m1 = (l + r) / 2;
1267 if (table[m1].addr == pbegin) break;
1268 if (table[m1].addr < pbegin) l = m1 + 1;
1269 else r = m1 - 1;
1270 }
1271
1272 l = 0, r = num_cache_table - 1;
1273 while (l <= r) {
1274 m2 = (l + r) / 2;
1275 if (table[m2].addr == pend) break;
1276 if (table[m2].addr < pend) l = m2 + 1;
1277 else r = m2 - 1;
1278 }
1279
1280 if (table[m1].addr < pbegin && m1 + 1 < num_cache_table) m1++;
1281 if (table[m2].addr > pend && m2 - 1 > 0) m2--;
1282
1283 item1 = &table[m1];
1284 item2 = &table[m2];
1285
1286 if (item1->outer_repeat < 0) k1 = item1->num;
1287 else k1 = reg->repeat_range[item1->outer_repeat].base_num + item1->num;
1288
1289 if (item2->outer_repeat < 0) k2 = item2->num;
1290 else {
1291 OnigRepeatRange *range = &reg->repeat_range[item2->outer_repeat];
1292 if (range->upper == 0x7fffffff) k2 = range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item2->num;
1293 else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num;
1294 }
1295
1296 base = pos * num_cache_size;
1297 k1 += base;
1298 k2 += base;
1299
1300 if ((k1 >> 3) == (k2 >> 3)) {
1301 match_cache[k1 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1);
1302 } else {
1303 long i = k1 >> 3;
1304 if (k1 & 7) {
1305 match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1;
1306 i++;
1307 }
1308 if (i < (k2 >> 3)) {
1309 xmemset(&match_cache[i], 0, (k2 >> 3) - i);
1310 if (k2 & 7) {
1311 match_cache[k2 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1));
1312 }
1313 }
1314 }
1315}
1316
1317#else
1318#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache)
1319#endif /* USE_CACHE_MATCH_OPT */
1320
1321#define STACK_PUSH_REPEAT(id, pat) do {\
1322 STACK_ENSURE(1);\
1323 stk->type = STK_REPEAT;\
1324 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1325 stk->u.repeat.num = (id);\
1326 stk->u.repeat.pcode = (pat);\
1327 stk->u.repeat.count = 0;\
1328 STACK_INC;\
1329} while(0)
1330
1331#define STACK_PUSH_REPEAT_INC(sindex) do {\
1332 STACK_ENSURE(1);\
1333 stk->type = STK_REPEAT_INC;\
1334 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1335 stk->u.repeat_inc.si = (sindex);\
1336 STACK_INC;\
1337} while(0)
1338
1339#define STACK_PUSH_MEM_START(mnum, s) do {\
1340 STACK_ENSURE(1);\
1341 stk->type = STK_MEM_START;\
1342 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1343 stk->u.mem.num = (mnum);\
1344 stk->u.mem.pstr = (s);\
1345 stk->u.mem.start = mem_start_stk[mnum];\
1346 stk->u.mem.end = mem_end_stk[mnum];\
1347 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1348 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1349 STACK_INC;\
1350} while(0)
1351
1352#define STACK_PUSH_MEM_END(mnum, s) do {\
1353 STACK_ENSURE(1);\
1354 stk->type = STK_MEM_END;\
1355 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1356 stk->u.mem.num = (mnum);\
1357 stk->u.mem.pstr = (s);\
1358 stk->u.mem.start = mem_start_stk[mnum];\
1359 stk->u.mem.end = mem_end_stk[mnum];\
1360 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1361 STACK_INC;\
1362} while(0)
1363
1364#define STACK_PUSH_MEM_END_MARK(mnum) do {\
1365 STACK_ENSURE(1);\
1366 stk->type = STK_MEM_END_MARK;\
1367 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1368 stk->u.mem.num = (mnum);\
1369 STACK_INC;\
1370} while(0)
1371
1372#define STACK_GET_MEM_START(mnum, k) do {\
1373 int level = 0;\
1374 k = stk;\
1375 while (k > stk_base) {\
1376 k--;\
1377 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1378 && k->u.mem.num == (mnum)) {\
1379 level++;\
1380 }\
1381 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1382 if (level == 0) break;\
1383 level--;\
1384 }\
1385 }\
1386} while(0)
1387
1388#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1389 int level = 0;\
1390 while (k < stk) {\
1391 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1392 if (level == 0) (start) = k->u.mem.pstr;\
1393 level++;\
1394 }\
1395 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1396 level--;\
1397 if (level == 0) {\
1398 (end) = k->u.mem.pstr;\
1399 break;\
1400 }\
1401 }\
1402 k++;\
1403 }\
1404} while(0)
1405
1406#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1407 STACK_ENSURE(1);\
1408 stk->type = STK_NULL_CHECK_START;\
1409 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1410 stk->u.null_check.num = (cnum);\
1411 stk->u.null_check.pstr = (s);\
1412 STACK_INC;\
1413} while(0)
1414
1415#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1416 STACK_ENSURE(1);\
1417 stk->type = STK_NULL_CHECK_END;\
1418 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1419 stk->u.null_check.num = (cnum);\
1420 STACK_INC;\
1421} while(0)
1422
1423#define STACK_PUSH_CALL_FRAME(pat) do {\
1424 STACK_ENSURE(1);\
1425 stk->type = STK_CALL_FRAME;\
1426 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1427 stk->u.call_frame.ret_addr = (pat);\
1428 STACK_INC;\
1429} while(0)
1430
1431#define STACK_PUSH_RETURN do {\
1432 STACK_ENSURE(1);\
1433 stk->type = STK_RETURN;\
1434 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1435 STACK_INC;\
1436} while(0)
1437
1438#define STACK_PUSH_ABSENT_POS(start, end) do {\
1439 STACK_ENSURE(1);\
1440 stk->type = STK_ABSENT_POS;\
1441 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1442 stk->u.absent_pos.abs_pstr = (start);\
1443 stk->u.absent_pos.end_pstr = (end);\
1444 STACK_INC;\
1445} while(0)
1446
1447
1448#ifdef ONIG_DEBUG
1449# define STACK_BASE_CHECK(p, at) \
1450 if ((p) < stk_base) {\
1451 fprintf(stderr, "at %s\n", at);\
1452 goto stack_error;\
1453 }
1454#else
1455# define STACK_BASE_CHECK(p, at)
1456#endif
1457
1458#define STACK_POP_ONE do {\
1459 stk--;\
1460 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1461} while(0)
1462
1463#define STACK_POP do {\
1464 switch (pop_level) {\
1465 case STACK_POP_LEVEL_FREE:\
1466 while (1) {\
1467 stk--;\
1468 STACK_BASE_CHECK(stk, "STACK_POP"); \
1469 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1470 ELSE_IF_STATE_CHECK_MARK(stk);\
1471 }\
1472 break;\
1473 case STACK_POP_LEVEL_MEM_START:\
1474 while (1) {\
1475 stk--;\
1476 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1477 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1478 else if (stk->type == STK_MEM_START) {\
1479 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1480 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1481 }\
1482 ELSE_IF_STATE_CHECK_MARK(stk);\
1483 }\
1484 break;\
1485 default:\
1486 while (1) {\
1487 stk--;\
1488 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1489 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1490 else if (stk->type == STK_MEM_START) {\
1491 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1492 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1493 }\
1494 else if (stk->type == STK_REPEAT_INC) {\
1495 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1496 }\
1497 else if (stk->type == STK_MEM_END) {\
1498 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1499 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1500 }\
1501 ELSE_IF_STATE_CHECK_MARK(stk);\
1502 }\
1503 break;\
1504 }\
1505} while(0)
1506
1507#define STACK_POP_TIL_POS_NOT do {\
1508 while (1) {\
1509 stk--;\
1510 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1511 if (stk->type == STK_POS_NOT) break;\
1512 else if (stk->type == STK_MEM_START) {\
1513 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1514 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1515 }\
1516 else if (stk->type == STK_REPEAT_INC) {\
1517 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1518 }\
1519 else if (stk->type == STK_MEM_END) {\
1520 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1521 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1522 }\
1523 ELSE_IF_STATE_CHECK_MARK(stk);\
1524 }\
1525} while(0)
1526
1527#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
1528 while (1) {\
1529 stk--;\
1530 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1531 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1532 else if (stk->type == STK_MEM_START) {\
1533 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1534 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1535 }\
1536 else if (stk->type == STK_REPEAT_INC) {\
1537 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1538 }\
1539 else if (stk->type == STK_MEM_END) {\
1540 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1541 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1542 }\
1543 ELSE_IF_STATE_CHECK_MARK(stk);\
1544 }\
1545} while(0)
1546
1547#define STACK_POP_TIL_ABSENT do {\
1548 while (1) {\
1549 stk--;\
1550 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1551 if (stk->type == STK_ABSENT) break;\
1552 else if (stk->type == STK_MEM_START) {\
1553 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1554 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1555 }\
1556 else if (stk->type == STK_REPEAT_INC) {\
1557 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1558 }\
1559 else if (stk->type == STK_MEM_END) {\
1560 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1561 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1562 }\
1563 ELSE_IF_STATE_CHECK_MARK(stk);\
1564 }\
1565} while(0)
1566
1567#define STACK_POP_ABSENT_POS(start, end) do {\
1568 stk--;\
1569 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1570 (start) = stk->u.absent_pos.abs_pstr;\
1571 (end) = stk->u.absent_pos.end_pstr;\
1572} while(0)
1573
1574#define STACK_POS_END(k) do {\
1575 k = stk;\
1576 while (1) {\
1577 k--;\
1578 STACK_BASE_CHECK(k, "STACK_POS_END"); \
1579 if (IS_TO_VOID_TARGET(k)) {\
1580 k->type = STK_VOID;\
1581 }\
1582 else if (k->type == STK_POS) {\
1583 k->type = STK_VOID;\
1584 break;\
1585 }\
1586 }\
1587} while(0)
1588
1589#define STACK_STOP_BT_END do {\
1590 OnigStackType *k = stk;\
1591 while (1) {\
1592 k--;\
1593 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1594 if (IS_TO_VOID_TARGET(k)) {\
1595 k->type = STK_VOID;\
1596 }\
1597 else if (k->type == STK_STOP_BT) {\
1598 k->type = STK_VOID;\
1599 break;\
1600 }\
1601 }\
1602} while(0)
1603
1604#define STACK_NULL_CHECK(isnull,id,s) do {\
1605 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1606 while (1) {\
1607 k--;\
1608 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1609 if (k->type == STK_NULL_CHECK_START) {\
1610 if (k->u.null_check.num == (id)) {\
1611 (isnull) = (k->u.null_check.pstr == (s));\
1612 break;\
1613 }\
1614 }\
1615 }\
1616} while(0)
1617
1618#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1619 int level = 0;\
1620 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1621 while (1) {\
1622 k--;\
1623 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1624 if (k->type == STK_NULL_CHECK_START) {\
1625 if (k->u.null_check.num == (id)) {\
1626 if (level == 0) {\
1627 (isnull) = (k->u.null_check.pstr == (s));\
1628 break;\
1629 }\
1630 else level--;\
1631 }\
1632 }\
1633 else if (k->type == STK_NULL_CHECK_END) {\
1634 level++;\
1635 }\
1636 }\
1637} while(0)
1638
1639#define STACK_NULL_CHECK_MEMST(isnull,ischange,id,s,reg) do {\
1640 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1641 while (1) {\
1642 k--;\
1643 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1644 if (k->type == STK_NULL_CHECK_START) {\
1645 if (k->u.null_check.num == (id)) {\
1646 if (k->u.null_check.pstr != (s)) {\
1647 (isnull) = 0;\
1648 break;\
1649 }\
1650 else {\
1651 UChar* endp;\
1652 (isnull) = 1;\
1653 while (k < stk) {\
1654 if (k->type == STK_MEM_START) {\
1655 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1656 (isnull) = 0; (ischange) = 1; break;\
1657 }\
1658 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1659 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1660 else\
1661 endp = (UChar* )k->u.mem.end;\
1662 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1663 (isnull) = 0; (ischange) = 1; break;\
1664 }\
1665 else if (endp != s) {\
1666 (isnull) = -1; /* empty, but position changed */ \
1667 }\
1668 }\
1669 k++;\
1670 }\
1671 break;\
1672 }\
1673 }\
1674 }\
1675 }\
1676} while(0)
1677
1678#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1679 int level = 0;\
1680 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1681 while (1) {\
1682 k--;\
1683 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1684 if (k->type == STK_NULL_CHECK_START) {\
1685 if (k->u.null_check.num == (id)) {\
1686 if (level == 0) {\
1687 if (k->u.null_check.pstr != (s)) {\
1688 (isnull) = 0;\
1689 break;\
1690 }\
1691 else {\
1692 UChar* endp;\
1693 (isnull) = 1;\
1694 while (k < stk) {\
1695 if (k->type == STK_MEM_START) {\
1696 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1697 (isnull) = 0; break;\
1698 }\
1699 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1700 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1701 else\
1702 endp = (UChar* )k->u.mem.end;\
1703 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1704 (isnull) = 0; break;\
1705 }\
1706 else if (endp != s) {\
1707 (isnull) = -1; /* empty, but position changed */ \
1708 }\
1709 }\
1710 k++;\
1711 }\
1712 break;\
1713 }\
1714 }\
1715 else {\
1716 level--;\
1717 }\
1718 }\
1719 }\
1720 else if (k->type == STK_NULL_CHECK_END) {\
1721 if (k->u.null_check.num == (id)) level++;\
1722 }\
1723 }\
1724} while(0)
1725
1726#define STACK_GET_REPEAT(id, k) do {\
1727 int level = 0;\
1728 k = stk;\
1729 while (1) {\
1730 k--;\
1731 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1732 if (k->type == STK_REPEAT) {\
1733 if (level == 0) {\
1734 if (k->u.repeat.num == (id)) {\
1735 break;\
1736 }\
1737 }\
1738 }\
1739 else if (k->type == STK_CALL_FRAME) level--;\
1740 else if (k->type == STK_RETURN) level++;\
1741 }\
1742} while(0)
1743
1744#define STACK_RETURN(addr) do {\
1745 int level = 0;\
1746 OnigStackType* k = stk;\
1747 while (1) {\
1748 k--;\
1749 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1750 if (k->type == STK_CALL_FRAME) {\
1751 if (level == 0) {\
1752 (addr) = k->u.call_frame.ret_addr;\
1753 break;\
1754 }\
1755 else level--;\
1756 }\
1757 else if (k->type == STK_RETURN)\
1758 level++;\
1759 }\
1760} while(0)
1761
1762
1763#define STRING_CMP(s1,s2,len) do {\
1764 while (len-- > 0) {\
1765 if (*s1++ != *s2++) goto fail;\
1766 }\
1767} while(0)
1768
1769#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1770 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1771 goto fail; \
1772} while(0)
1773
1774static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1775 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1776{
1777 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1778 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1779 UChar *p1, *p2, *end1, *s2;
1780 int len1, len2;
1781
1782 s2 = *ps2;
1783 end1 = s1 + mblen;
1784 while (s1 < end1) {
1785 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1786 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1787 if (len1 != len2) return 0;
1788 p1 = buf1;
1789 p2 = buf2;
1790 while (len1-- > 0) {
1791 if (*p1 != *p2) return 0;
1792 p1++;
1793 p2++;
1794 }
1795 }
1796
1797 *ps2 = s2;
1798 return 1;
1799}
1800
1801#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1802 is_fail = 0;\
1803 while (len-- > 0) {\
1804 if (*s1++ != *s2++) {\
1805 is_fail = 1; break;\
1806 }\
1807 }\
1808} while(0)
1809
1810#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1811 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1812 is_fail = 1; \
1813 else \
1814 is_fail = 0; \
1815} while(0)
1816
1817
1818#define IS_EMPTY_STR (str == end)
1819#define ON_STR_BEGIN(s) ((s) == str)
1820#define ON_STR_END(s) ((s) == end)
1821#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1822# define DATA_ENSURE_CHECK1 (s < right_range)
1823# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1824# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1825# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1826# define ABSENT_END_POS right_range
1827#else
1828# define DATA_ENSURE_CHECK1 (s < end)
1829# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1830# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1831# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1832# define ABSENT_END_POS end
1833#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1834
1835
1836#ifdef USE_CAPTURE_HISTORY
1837static int
1838make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1839 OnigStackType* stk_top, UChar* str, regex_t* reg)
1840{
1841 int n, r;
1842 OnigCaptureTreeNode* child;
1843 OnigStackType* k = *kp;
1844
1845 while (k < stk_top) {
1846 if (k->type == STK_MEM_START) {
1847 n = k->u.mem.num;
1848 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1849 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1850 child = history_node_new();
1851 CHECK_NULL_RETURN_MEMERR(child);
1852 child->group = n;
1853 child->beg = k->u.mem.pstr - str;
1854 r = history_tree_add_child(node, child);
1855 if (r != 0) {
1856 history_tree_free(child);
1857 return r;
1858 }
1859 *kp = (k + 1);
1860 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1861 if (r != 0) return r;
1862
1863 k = *kp;
1864 child->end = k->u.mem.pstr - str;
1865 }
1866 }
1867 else if (k->type == STK_MEM_END) {
1868 if (k->u.mem.num == node->group) {
1869 node->end = k->u.mem.pstr - str;
1870 *kp = k;
1871 return 0;
1872 }
1873 }
1874 k++;
1875 }
1876
1877 return 1; /* 1: root node ending. */
1878}
1879#endif /* USE_CAPTURE_HISTORY */
1880
1881#ifdef USE_BACKREF_WITH_LEVEL
1882static int
1883mem_is_in_memp(int mem, int num, UChar* memp)
1884{
1885 int i;
1886 MemNumType m;
1887
1888 for (i = 0; i < num; i++) {
1889 GET_MEMNUM_INC(m, memp);
1890 if (mem == (int )m) return 1;
1891 }
1892 return 0;
1893}
1894
1895static int backref_match_at_nested_level(regex_t* reg,
1896 OnigStackType* top, OnigStackType* stk_base,
1897 int ignore_case, int case_fold_flag,
1898 int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1899{
1900 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1901 int level;
1902 OnigStackType* k;
1903
1904 level = 0;
1905 k = top;
1906 k--;
1907 while (k >= stk_base) {
1908 if (k->type == STK_CALL_FRAME) {
1909 level--;
1910 }
1911 else if (k->type == STK_RETURN) {
1912 level++;
1913 }
1914 else if (level == nest) {
1915 if (k->type == STK_MEM_START) {
1916 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1917 pstart = k->u.mem.pstr;
1918 if (pend != NULL_UCHARP) {
1919 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1920 p = pstart;
1921 ss = *s;
1922
1923 if (ignore_case != 0) {
1924 if (string_cmp_ic(reg->enc, case_fold_flag,
1925 pstart, &ss, pend - pstart, send) == 0)
1926 return 0; /* or goto next_mem; */
1927 }
1928 else {
1929 while (p < pend) {
1930 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1931 }
1932 }
1933
1934 *s = ss;
1935 return 1;
1936 }
1937 }
1938 }
1939 else if (k->type == STK_MEM_END) {
1940 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1941 pend = k->u.mem.pstr;
1942 }
1943 }
1944 }
1945 k--;
1946 }
1947
1948 return 0;
1949}
1950#endif /* USE_BACKREF_WITH_LEVEL */
1951
1952
1953#ifdef ONIG_DEBUG_STATISTICS
1954
1955# ifdef _WIN32
1956# include <windows.h>
1957static LARGE_INTEGER ts, te, freq;
1958# define GETTIME(t) QueryPerformanceCounter(&(t))
1959# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1960 * 1000000 / freq.QuadPart)
1961# else /* _WIN32 */
1962
1963# define USE_TIMEOFDAY
1964
1965# ifdef USE_TIMEOFDAY
1966# ifdef HAVE_SYS_TIME_H
1967# include <sys/time.h>
1968# endif
1969# ifdef HAVE_UNISTD_H
1970# include <unistd.h>
1971# endif
1972static struct timeval ts, te;
1973# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1974# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1975 (((te).tv_sec - (ts).tv_sec)*1000000))
1976# else /* USE_TIMEOFDAY */
1977# ifdef HAVE_SYS_TIMES_H
1978# include <sys/times.h>
1979# endif
1980static struct tms ts, te;
1981# define GETTIME(t) times(&(t))
1982# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1983# endif /* USE_TIMEOFDAY */
1984
1985# endif /* _WIN32 */
1986
1987static int OpCounter[256];
1988static int OpPrevCounter[256];
1989static unsigned long OpTime[256];
1990static int OpCurr = OP_FINISH;
1991static int OpPrevTarget = OP_FAIL;
1992static int MaxStackDepth = 0;
1993
1994# define MOP_IN(opcode) do {\
1995 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1996 OpCurr = opcode;\
1997 OpCounter[opcode]++;\
1998 GETTIME(ts);\
1999} while(0)
2000
2001# define MOP_OUT do {\
2002 GETTIME(te);\
2003 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2004} while(0)
2005
2006extern void
2007onig_statistics_init(void)
2008{
2009 int i;
2010 for (i = 0; i < 256; i++) {
2011 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2012 }
2013 MaxStackDepth = 0;
2014# ifdef _WIN32
2015 QueryPerformanceFrequency(&freq);
2016# endif
2017}
2018
2019extern void
2020onig_print_statistics(FILE* f)
2021{
2022 int i;
2023 fprintf(f, " count prev time\n");
2024 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
2025 fprintf(f, "%8d: %8d: %10lu: %s\n",
2026 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
2027 }
2028 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2029}
2030
2031# define STACK_INC do {\
2032 stk++;\
2033 if (stk - stk_base > MaxStackDepth) \
2034 MaxStackDepth = stk - stk_base;\
2035} while(0)
2036
2037#else /* ONIG_DEBUG_STATISTICS */
2038# define STACK_INC stk++
2039
2040# define MOP_IN(opcode)
2041# define MOP_OUT
2042#endif /* ONIG_DEBUG_STATISTICS */
2043
2044
2045#ifdef ONIG_DEBUG_MATCH
2046static char *
2047stack_type_str(int stack_type)
2048{
2049 switch (stack_type) {
2050 case STK_ALT: return "Alt ";
2051 case STK_LOOK_BEHIND_NOT: return "LBNot ";
2052 case STK_POS_NOT: return "PosNot";
2053 case STK_MEM_START: return "MemS ";
2054 case STK_MEM_END: return "MemE ";
2055 case STK_REPEAT_INC: return "RepInc";
2056 case STK_STATE_CHECK_MARK: return "StChMk";
2057 case STK_NULL_CHECK_START: return "NulChS";
2058 case STK_NULL_CHECK_END: return "NulChE";
2059 case STK_MEM_END_MARK: return "MemEMk";
2060 case STK_POS: return "Pos ";
2061 case STK_STOP_BT: return "StopBt";
2062 case STK_REPEAT: return "Rep ";
2063 case STK_CALL_FRAME: return "Call ";
2064 case STK_RETURN: return "Ret ";
2065 case STK_VOID: return "Void ";
2066 case STK_ABSENT_POS: return "AbsPos";
2067 case STK_ABSENT: return "Absent";
2068 default: return " ";
2069 }
2070}
2071#endif
2072
2073/* match data(str - end) from position (sstart). */
2074/* if sstart == str then set sprev to NULL. */
2075static OnigPosition
2076match_at(regex_t* reg, const UChar* str, const UChar* end,
2077#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2078 const UChar* right_range,
2079#endif
2080 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
2081{
2082 static const UChar FinishCode[] = { OP_FINISH };
2083
2084 int i, num_mem, pop_level;
2085 ptrdiff_t n, best_len;
2086 LengthType tlen, tlen2;
2087 MemNumType mem;
2088 RelAddrType addr;
2089 OnigOptionType option = reg->options;
2090 OnigEncoding encode = reg->enc;
2091 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2092 UChar *s, *q, *sbegin;
2093 UChar *p = reg->p;
2094 UChar *pbegin = p;
2095 UChar *pkeep;
2096 char *alloca_base;
2097 char *xmalloc_base = NULL;
2098 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
2099 OnigStackType *stkp; /* used as any purpose. */
2100 OnigStackIndex si;
2101 OnigStackIndex *repeat_stk;
2102 OnigStackIndex *mem_start_stk, *mem_end_stk;
2103#ifdef USE_COMBINATION_EXPLOSION_CHECK
2104 int scv;
2105 unsigned char* state_check_buff = msa->state_check_buff;
2106 int num_comb_exp_check = reg->num_comb_exp_check;
2107#endif
2108
2109#if USE_TOKEN_THREADED_VM
2110# define OP_OFFSET 1
2111# define VM_LOOP JUMP;
2112# define VM_LOOP_END
2113# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2114# define DEFAULT L_DEFAULT:
2115# define NEXT sprev = sbegin; JUMP
2116# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2117
2118 RB_GNUC_EXTENSION static const void *oplabels[] = {
2119 &&L_OP_FINISH, /* matching process terminator (no more alternative) */
2120 &&L_OP_END, /* pattern code terminator (success end) */
2121
2122 &&L_OP_EXACT1, /* single byte, N = 1 */
2123 &&L_OP_EXACT2, /* single byte, N = 2 */
2124 &&L_OP_EXACT3, /* single byte, N = 3 */
2125 &&L_OP_EXACT4, /* single byte, N = 4 */
2126 &&L_OP_EXACT5, /* single byte, N = 5 */
2127 &&L_OP_EXACTN, /* single byte */
2128 &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
2129 &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
2130 &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
2131 &&L_OP_EXACTMB2N, /* mb-length = 2 */
2132 &&L_OP_EXACTMB3N, /* mb-length = 3 */
2133 &&L_OP_EXACTMBN, /* other length */
2134
2135 &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
2136 &&L_OP_EXACTN_IC, /* single byte, ignore case */
2137
2138 &&L_OP_CCLASS,
2139 &&L_OP_CCLASS_MB,
2140 &&L_OP_CCLASS_MIX,
2141 &&L_OP_CCLASS_NOT,
2142 &&L_OP_CCLASS_MB_NOT,
2143 &&L_OP_CCLASS_MIX_NOT,
2144
2145 &&L_OP_ANYCHAR, /* "." */
2146 &&L_OP_ANYCHAR_ML, /* "." multi-line */
2147 &&L_OP_ANYCHAR_STAR, /* ".*" */
2148 &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
2149 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
2150 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
2151
2152 &&L_OP_WORD,
2153 &&L_OP_NOT_WORD,
2154 &&L_OP_WORD_BOUND,
2155 &&L_OP_NOT_WORD_BOUND,
2156# ifdef USE_WORD_BEGIN_END
2157 &&L_OP_WORD_BEGIN,
2158 &&L_OP_WORD_END,
2159# else
2160 &&L_DEFAULT,
2161 &&L_DEFAULT,
2162# endif
2163 &&L_OP_ASCII_WORD,
2164 &&L_OP_NOT_ASCII_WORD,
2165 &&L_OP_ASCII_WORD_BOUND,
2166 &&L_OP_NOT_ASCII_WORD_BOUND,
2167# ifdef USE_WORD_BEGIN_END
2168 &&L_OP_ASCII_WORD_BEGIN,
2169 &&L_OP_ASCII_WORD_END,
2170# else
2171 &&L_DEFAULT,
2172 &&L_DEFAULT,
2173# endif
2174
2175 &&L_OP_BEGIN_BUF,
2176 &&L_OP_END_BUF,
2177 &&L_OP_BEGIN_LINE,
2178 &&L_OP_END_LINE,
2179 &&L_OP_SEMI_END_BUF,
2180 &&L_OP_BEGIN_POSITION,
2181
2182 &&L_OP_BACKREF1,
2183 &&L_OP_BACKREF2,
2184 &&L_OP_BACKREFN,
2185 &&L_OP_BACKREFN_IC,
2186 &&L_OP_BACKREF_MULTI,
2187 &&L_OP_BACKREF_MULTI_IC,
2188# ifdef USE_BACKREF_WITH_LEVEL
2189 &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
2190# else
2191 &&L_DEFAULT,
2192# endif
2193 &&L_OP_MEMORY_START,
2194 &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
2195 &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
2196# ifdef USE_SUBEXP_CALL
2197 &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
2198# else
2199 &&L_DEFAULT,
2200# endif
2201 &&L_OP_MEMORY_END,
2202# ifdef USE_SUBEXP_CALL
2203 &&L_OP_MEMORY_END_REC, /* push marker to stack */
2204# else
2205 &&L_DEFAULT,
2206# endif
2207
2208 &&L_OP_KEEP,
2209
2210 &&L_OP_FAIL, /* pop stack and move */
2211 &&L_OP_JUMP,
2212 &&L_OP_PUSH,
2213 &&L_OP_POP,
2214# ifdef USE_OP_PUSH_OR_JUMP_EXACT
2215 &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
2216# else
2217 &&L_DEFAULT,
2218# endif
2219 &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
2220 &&L_OP_REPEAT, /* {n,m} */
2221 &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
2222 &&L_OP_REPEAT_INC,
2223 &&L_OP_REPEAT_INC_NG, /* non greedy */
2224 &&L_OP_REPEAT_INC_SG, /* search and get in stack */
2225 &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
2226 &&L_OP_NULL_CHECK_START, /* null loop checker start */
2227 &&L_OP_NULL_CHECK_END, /* null loop checker end */
2228# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2229 &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
2230# else
2231 &&L_DEFAULT,
2232# endif
2233# ifdef USE_SUBEXP_CALL
2234 &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
2235# else
2236 &&L_DEFAULT,
2237# endif
2238
2239 &&L_OP_PUSH_POS, /* (?=...) start */
2240 &&L_OP_POP_POS, /* (?=...) end */
2241 &&L_OP_PUSH_POS_NOT, /* (?!...) start */
2242 &&L_OP_FAIL_POS, /* (?!...) end */
2243 &&L_OP_PUSH_STOP_BT, /* (?>...) start */
2244 &&L_OP_POP_STOP_BT, /* (?>...) end */
2245 &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
2246 &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
2247 &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
2248 &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
2249 &&L_OP_ABSENT, /* (?~...) start of inner loop */
2250 &&L_OP_ABSENT_END, /* (?~...) end */
2251
2252# ifdef USE_SUBEXP_CALL
2253 &&L_OP_CALL, /* \g<name> */
2254 &&L_OP_RETURN,
2255# else
2256 &&L_DEFAULT,
2257 &&L_DEFAULT,
2258# endif
2259 &&L_OP_CONDITION,
2260
2261# ifdef USE_COMBINATION_EXPLOSION_CHECK
2262 &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
2263 &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
2264 &&L_OP_STATE_CHECK, /* check only */
2265# else
2266 &&L_DEFAULT,
2267 &&L_DEFAULT,
2268 &&L_DEFAULT,
2269# endif
2270# ifdef USE_COMBINATION_EXPLOSION_CHECK
2271 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
2272 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
2273# else
2274 &&L_DEFAULT,
2275 &&L_DEFAULT,
2276# endif
2277 /* no need: IS_DYNAMIC_OPTION() == 0 */
2278# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2279 &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
2280 &&L_OP_SET_OPTION /* set option */
2281# else
2282 &&L_DEFAULT,
2283 &&L_DEFAULT
2284# endif
2285 };
2286#else /* USE_TOKEN_THREADED_VM */
2287
2288# define OP_OFFSET 0
2289# define VM_LOOP \
2290 while (1) { \
2291 OPCODE_EXEC_HOOK; \
2292 pbegin = p; \
2293 sbegin = s; \
2294 switch (*p++) {
2295# define VM_LOOP_END } sprev = sbegin; }
2296# define CASE(x) case x:
2297# define DEFAULT default:
2298# define NEXT break
2299# define JUMP continue; break
2300#endif /* USE_TOKEN_THREADED_VM */
2301
2302
2303#ifdef USE_SUBEXP_CALL
2304/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
2305 etc. Additional space is required. */
2306# define ADD_NUMMEM 1
2307#else
2308/* Stack #0 not is used. */
2309# define ADD_NUMMEM 0
2310#endif
2311
2312 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
2313
2314 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
2315 pop_level = reg->stack_pop_level;
2316 num_mem = reg->num_mem;
2317 repeat_stk = (OnigStackIndex* )alloca_base;
2318
2319 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
2320 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
2321 {
2322 OnigStackIndex *pp = mem_start_stk;
2323 for (; pp < repeat_stk + n; pp += 2) {
2324 pp[0] = INVALID_STACK_INDEX;
2325 pp[1] = INVALID_STACK_INDEX;
2326 }
2327 }
2328#ifndef USE_SUBEXP_CALL
2329 mem_start_stk--; /* for index start from 1,
2330 mem_start_stk[1]..mem_start_stk[num_mem] */
2331 mem_end_stk--; /* for index start from 1,
2332 mem_end_stk[1]..mem_end_stk[num_mem] */
2333#endif
2334
2335#ifdef ONIG_DEBUG_MATCH
2336 fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
2337 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
2338 fprintf(stderr, "size: %d, start offset: %d\n",
2339 (int )(end - str), (int )(sstart - str));
2340 fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
2341#endif
2342
2343 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
2344 best_len = ONIG_MISMATCH;
2345 s = (UChar* )sstart;
2346 pkeep = (UChar* )sstart;
2347
2348
2349#ifdef ONIG_DEBUG_MATCH
2350# define OPCODE_EXEC_HOOK \
2351 if (s) { \
2352 UChar *op, *q, *bp, buf[50]; \
2353 int len; \
2354 op = p - OP_OFFSET; \
2355 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2356 bp = buf; \
2357 q = s; \
2358 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
2359 for (i = 0; i < 7 && q < end; i++) { \
2360 len = enclen(encode, q, end); \
2361 while (len-- > 0) *bp++ = *q++; \
2362 } \
2363 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
2364 } \
2365 xmemcpy(bp, "\"", 1); bp += 1; \
2366 *bp = 0; \
2367 fputs((char* )buf, stderr); \
2368 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
2369 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
2370 stk - stk_base - 1, \
2371 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
2372 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
2373 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2374 fprintf(stderr, "\n"); \
2375 }
2376#else
2377# define OPCODE_EXEC_HOOK ((void) 0)
2378#endif
2379
2380
2381 VM_LOOP {
2382 CASE(OP_END) MOP_IN(OP_END);
2383 n = s - sstart;
2384 if (n > best_len) {
2385 OnigRegion* region;
2386#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2387 if (IS_FIND_LONGEST(option)) {
2388 if (n > msa->best_len) {
2389 msa->best_len = n;
2390 msa->best_s = (UChar* )sstart;
2391 }
2392 else
2393 goto end_best_len;
2394 }
2395#endif
2396 best_len = n;
2397 region = msa->region;
2398 if (region) {
2399 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2400 region->end[0] = s - str;
2401 for (i = 1; i <= num_mem; i++) {
2402 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2403 if (BIT_STATUS_AT(reg->bt_mem_start, i))
2404 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2405 else
2406 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
2407
2408 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2409 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2410 : (UChar* )((void* )mem_end_stk[i])) - str;
2411 }
2412 else {
2413 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2414 }
2415 }
2416
2417#ifdef USE_CAPTURE_HISTORY
2418 if (reg->capture_history != 0) {
2419 int r;
2420 OnigCaptureTreeNode* node;
2421
2422 if (IS_NULL(region->history_root)) {
2423 region->history_root = node = history_node_new();
2424 CHECK_NULL_RETURN_MEMERR(node);
2425 }
2426 else {
2427 node = region->history_root;
2428 history_tree_clear(node);
2429 }
2430
2431 node->group = 0;
2432 node->beg = ((pkeep > s) ? s : pkeep) - str;
2433 node->end = s - str;
2434
2435 stkp = stk_base;
2436 r = make_capture_history_tree(region->history_root, &stkp,
2437 stk, (UChar* )str, reg);
2438 if (r < 0) {
2439 best_len = r; /* error code */
2440 goto finish;
2441 }
2442 }
2443#endif /* USE_CAPTURE_HISTORY */
2444 } /* if (region) */
2445 } /* n > best_len */
2446
2447#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2448 end_best_len:
2449#endif
2450 MOP_OUT;
2451
2452 if (IS_FIND_CONDITION(option)) {
2453 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2454 best_len = ONIG_MISMATCH;
2455 goto fail; /* for retry */
2456 }
2457 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2458 goto fail; /* for retry */
2459 }
2460 }
2461
2462 /* default behavior: return first-matching result. */
2463 goto finish;
2464 NEXT;
2465
2466 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
2467 DATA_ENSURE(1);
2468 if (*p != *s) goto fail;
2469 p++; s++;
2470 MOP_OUT;
2471 NEXT;
2472
2473 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
2474 {
2475 int len;
2476 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2477
2478 DATA_ENSURE(1);
2479 len = ONIGENC_MBC_CASE_FOLD(encode,
2480 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2481 case_fold_flag,
2482 &s, end, lowbuf);
2483 DATA_ENSURE(0);
2484 q = lowbuf;
2485 while (len-- > 0) {
2486 if (*p != *q) {
2487 goto fail;
2488 }
2489 p++; q++;
2490 }
2491 }
2492 MOP_OUT;
2493 NEXT;
2494
2495 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
2496 DATA_ENSURE(2);
2497 if (*p != *s) goto fail;
2498 p++; s++;
2499 if (*p != *s) goto fail;
2500 sprev = s;
2501 p++; s++;
2502 MOP_OUT;
2503 JUMP;
2504
2505 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
2506 DATA_ENSURE(3);
2507 if (*p != *s) goto fail;
2508 p++; s++;
2509 if (*p != *s) goto fail;
2510 p++; s++;
2511 if (*p != *s) goto fail;
2512 sprev = s;
2513 p++; s++;
2514 MOP_OUT;
2515 JUMP;
2516
2517 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
2518 DATA_ENSURE(4);
2519 if (*p != *s) goto fail;
2520 p++; s++;
2521 if (*p != *s) goto fail;
2522 p++; s++;
2523 if (*p != *s) goto fail;
2524 p++; s++;
2525 if (*p != *s) goto fail;
2526 sprev = s;
2527 p++; s++;
2528 MOP_OUT;
2529 JUMP;
2530
2531 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
2532 DATA_ENSURE(5);
2533 if (*p != *s) goto fail;
2534 p++; s++;
2535 if (*p != *s) goto fail;
2536 p++; s++;
2537 if (*p != *s) goto fail;
2538 p++; s++;
2539 if (*p != *s) goto fail;
2540 p++; s++;
2541 if (*p != *s) goto fail;
2542 sprev = s;
2543 p++; s++;
2544 MOP_OUT;
2545 JUMP;
2546
2547 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
2548 GET_LENGTH_INC(tlen, p);
2549 DATA_ENSURE(tlen);
2550 while (tlen-- > 0) {
2551 if (*p++ != *s++) goto fail;
2552 }
2553 sprev = s - 1;
2554 MOP_OUT;
2555 JUMP;
2556
2557 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
2558 {
2559 int len;
2560 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2561
2562 GET_LENGTH_INC(tlen, p);
2563 endp = p + tlen;
2564
2565 while (p < endp) {
2566 sprev = s;
2567 DATA_ENSURE(1);
2568 len = ONIGENC_MBC_CASE_FOLD(encode,
2569 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2570 case_fold_flag,
2571 &s, end, lowbuf);
2572 DATA_ENSURE(0);
2573 q = lowbuf;
2574 while (len-- > 0) {
2575 if (*p != *q) goto fail;
2576 p++; q++;
2577 }
2578 }
2579 }
2580
2581 MOP_OUT;
2582 JUMP;
2583
2584 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
2585 DATA_ENSURE(2);
2586 if (*p != *s) goto fail;
2587 p++; s++;
2588 if (*p != *s) goto fail;
2589 p++; s++;
2590 MOP_OUT;
2591 NEXT;
2592
2593 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
2594 DATA_ENSURE(4);
2595 if (*p != *s) goto fail;
2596 p++; s++;
2597 if (*p != *s) goto fail;
2598 p++; s++;
2599 sprev = s;
2600 if (*p != *s) goto fail;
2601 p++; s++;
2602 if (*p != *s) goto fail;
2603 p++; s++;
2604 MOP_OUT;
2605 JUMP;
2606
2607 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
2608 DATA_ENSURE(6);
2609 if (*p != *s) goto fail;
2610 p++; s++;
2611 if (*p != *s) goto fail;
2612 p++; s++;
2613 if (*p != *s) goto fail;
2614 p++; s++;
2615 if (*p != *s) goto fail;
2616 p++; s++;
2617 sprev = s;
2618 if (*p != *s) goto fail;
2619 p++; s++;
2620 if (*p != *s) goto fail;
2621 p++; s++;
2622 MOP_OUT;
2623 JUMP;
2624
2625 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
2626 GET_LENGTH_INC(tlen, p);
2627 DATA_ENSURE(tlen * 2);
2628 while (tlen-- > 0) {
2629 if (*p != *s) goto fail;
2630 p++; s++;
2631 if (*p != *s) goto fail;
2632 p++; s++;
2633 }
2634 sprev = s - 2;
2635 MOP_OUT;
2636 JUMP;
2637
2638 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
2639 GET_LENGTH_INC(tlen, p);
2640 DATA_ENSURE(tlen * 3);
2641 while (tlen-- > 0) {
2642 if (*p != *s) goto fail;
2643 p++; s++;
2644 if (*p != *s) goto fail;
2645 p++; s++;
2646 if (*p != *s) goto fail;
2647 p++; s++;
2648 }
2649 sprev = s - 3;
2650 MOP_OUT;
2651 JUMP;
2652
2653 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2654 GET_LENGTH_INC(tlen, p); /* mb-len */
2655 GET_LENGTH_INC(tlen2, p); /* string len */
2656 tlen2 *= tlen;
2657 DATA_ENSURE(tlen2);
2658 while (tlen2-- > 0) {
2659 if (*p != *s) goto fail;
2660 p++; s++;
2661 }
2662 sprev = s - tlen;
2663 MOP_OUT;
2664 JUMP;
2665
2666 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2667 DATA_ENSURE(1);
2668 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2669 p += SIZE_BITSET;
2670 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2671 MOP_OUT;
2672 NEXT;
2673
2674 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2675 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2676
2677 cclass_mb:
2678 GET_LENGTH_INC(tlen, p);
2679 {
2680 OnigCodePoint code;
2681 UChar *ss;
2682 int mb_len;
2683
2684 DATA_ENSURE(1);
2685 mb_len = enclen(encode, s, end);
2686 DATA_ENSURE(mb_len);
2687 ss = s;
2688 s += mb_len;
2689 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2690
2691#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2692 if (! onig_is_in_code_range(p, code)) goto fail;
2693#else
2694 q = p;
2695 ALIGNMENT_RIGHT(q);
2696 if (! onig_is_in_code_range(q, code)) goto fail;
2697#endif
2698 }
2699 p += tlen;
2700 MOP_OUT;
2701 NEXT;
2702
2703 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2704 DATA_ENSURE(1);
2705 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2706 p += SIZE_BITSET;
2707 goto cclass_mb;
2708 }
2709 else {
2710 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2711 goto fail;
2712
2713 p += SIZE_BITSET;
2714 GET_LENGTH_INC(tlen, p);
2715 p += tlen;
2716 s++;
2717 }
2718 MOP_OUT;
2719 NEXT;
2720
2721 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2722 DATA_ENSURE(1);
2723 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2724 p += SIZE_BITSET;
2725 s += enclen(encode, s, end);
2726 MOP_OUT;
2727 NEXT;
2728
2729 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2730 DATA_ENSURE(1);
2731 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2732 s++;
2733 GET_LENGTH_INC(tlen, p);
2734 p += tlen;
2735 goto cc_mb_not_success;
2736 }
2737
2738 cclass_mb_not:
2739 GET_LENGTH_INC(tlen, p);
2740 {
2741 OnigCodePoint code;
2742 UChar *ss;
2743 int mb_len = enclen(encode, s, end);
2744
2745 if (! DATA_ENSURE_CHECK(mb_len)) {
2746 DATA_ENSURE(1);
2747 s = (UChar* )end;
2748 p += tlen;
2749 goto cc_mb_not_success;
2750 }
2751
2752 ss = s;
2753 s += mb_len;
2754 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2755
2756#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2757 if (onig_is_in_code_range(p, code)) goto fail;
2758#else
2759 q = p;
2760 ALIGNMENT_RIGHT(q);
2761 if (onig_is_in_code_range(q, code)) goto fail;
2762#endif
2763 }
2764 p += tlen;
2765
2766 cc_mb_not_success:
2767 MOP_OUT;
2768 NEXT;
2769
2770 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
2771 DATA_ENSURE(1);
2772 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2773 p += SIZE_BITSET;
2774 goto cclass_mb_not;
2775 }
2776 else {
2777 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2778 goto fail;
2779
2780 p += SIZE_BITSET;
2781 GET_LENGTH_INC(tlen, p);
2782 p += tlen;
2783 s++;
2784 }
2785 MOP_OUT;
2786 NEXT;
2787
2788 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
2789 DATA_ENSURE(1);
2790 n = enclen(encode, s, end);
2791 DATA_ENSURE(n);
2792 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2793 s += n;
2794 MOP_OUT;
2795 NEXT;
2796
2797 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
2798 DATA_ENSURE(1);
2799 n = enclen(encode, s, end);
2800 DATA_ENSURE(n);
2801 s += n;
2802 MOP_OUT;
2803 NEXT;
2804
2805 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
2806 while (DATA_ENSURE_CHECK1) {
2807 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2808 STACK_PUSH_ALT(p, s, sprev, pkeep);
2809 n = enclen(encode, s, end);
2810 DATA_ENSURE(n);
2811 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2812 sprev = s;
2813 s += n;
2814 }
2815 MOP_OUT;
2816 JUMP;
2817
2818 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
2819 while (DATA_ENSURE_CHECK1) {
2820 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2821 STACK_PUSH_ALT(p, s, sprev, pkeep);
2822 n = enclen(encode, s, end);
2823 if (n > 1) {
2824 DATA_ENSURE(n);
2825 sprev = s;
2826 s += n;
2827 }
2828 else {
2829 sprev = s;
2830 s++;
2831 }
2832 }
2833 MOP_OUT;
2834 JUMP;
2835
2836 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2837 while (DATA_ENSURE_CHECK1) {
2838 if (*p == *s) {
2839 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
2840 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2841 }
2842 n = enclen(encode, s, end);
2843 DATA_ENSURE(n);
2844 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2845 sprev = s;
2846 s += n;
2847 }
2848 p++;
2849 MOP_OUT;
2850 NEXT;
2851
2852 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2853 while (DATA_ENSURE_CHECK1) {
2854 if (*p == *s) {
2855 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2856 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2857 }
2858 n = enclen(encode, s, end);
2859 if (n > 1) {
2860 DATA_ENSURE(n);
2861 sprev = s;
2862 s += n;
2863 }
2864 else {
2865 sprev = s;
2866 s++;
2867 }
2868 }
2869 p++;
2870 MOP_OUT;
2871 NEXT;
2872
2873#ifdef USE_COMBINATION_EXPLOSION_CHECK
2874 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2875 GET_STATE_CHECK_NUM_INC(mem, p);
2876 while (DATA_ENSURE_CHECK1) {
2877 STATE_CHECK_VAL(scv, mem);
2878 if (scv) goto fail;
2879
2880 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2881 n = enclen(encode, s, end);
2882 DATA_ENSURE(n);
2883 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2884 sprev = s;
2885 s += n;
2886 }
2887 MOP_OUT;
2888 NEXT;
2889
2890 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
2891 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2892
2893 GET_STATE_CHECK_NUM_INC(mem, p);
2894 while (DATA_ENSURE_CHECK1) {
2895 STATE_CHECK_VAL(scv, mem);
2896 if (scv) goto fail;
2897
2898 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2899 n = enclen(encode, s, end);
2900 if (n > 1) {
2901 DATA_ENSURE(n);
2902 sprev = s;
2903 s += n;
2904 }
2905 else {
2906 sprev = s;
2907 s++;
2908 }
2909 }
2910 MOP_OUT;
2911 NEXT;
2912#endif /* USE_COMBINATION_EXPLOSION_CHECK */
2913
2914 CASE(OP_WORD) MOP_IN(OP_WORD);
2915 DATA_ENSURE(1);
2916 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2917 goto fail;
2918
2919 s += enclen(encode, s, end);
2920 MOP_OUT;
2921 NEXT;
2922
2923 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
2924 DATA_ENSURE(1);
2925 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2926 goto fail;
2927
2928 s += enclen(encode, s, end);
2929 MOP_OUT;
2930 NEXT;
2931
2932 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
2933 DATA_ENSURE(1);
2934 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2935 goto fail;
2936
2937 s += enclen(encode, s, end);
2938 MOP_OUT;
2939 NEXT;
2940
2941 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
2942 DATA_ENSURE(1);
2943 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2944 goto fail;
2945
2946 s += enclen(encode, s, end);
2947 MOP_OUT;
2948 NEXT;
2949
2950 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
2951 if (ON_STR_BEGIN(s)) {
2952 DATA_ENSURE(1);
2953 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2954 goto fail;
2955 }
2956 else if (ON_STR_END(s)) {
2957 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2958 goto fail;
2959 }
2960 else {
2961 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2962 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2963 goto fail;
2964 }
2965 MOP_OUT;
2966 JUMP;
2967
2968 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
2969 if (ON_STR_BEGIN(s)) {
2970 DATA_ENSURE(1);
2971 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2972 goto fail;
2973 }
2974 else if (ON_STR_END(s)) {
2975 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2976 goto fail;
2977 }
2978 else {
2979 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2980 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2981 goto fail;
2982 }
2983 MOP_OUT;
2984 JUMP;
2985
2986 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
2987 if (ON_STR_BEGIN(s)) {
2988 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
2989 goto fail;
2990 }
2991 else if (ON_STR_END(s)) {
2992 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2993 goto fail;
2994 }
2995 else {
2996 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2997 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
2998 goto fail;
2999 }
3000 MOP_OUT;
3001 JUMP;
3002
3003 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3004 if (ON_STR_BEGIN(s)) {
3005 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3006 goto fail;
3007 }
3008 else if (ON_STR_END(s)) {
3009 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3010 goto fail;
3011 }
3012 else {
3013 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3014 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3015 goto fail;
3016 }
3017 MOP_OUT;
3018 JUMP;
3019
3020#ifdef USE_WORD_BEGIN_END
3021 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
3022 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3023 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3024 MOP_OUT;
3025 JUMP;
3026 }
3027 }
3028 goto fail;
3029 NEXT;
3030
3031 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
3032 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3033 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3034 MOP_OUT;
3035 JUMP;
3036 }
3037 }
3038 goto fail;
3039 NEXT;
3040
3041 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
3042 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3043 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3044 MOP_OUT;
3045 JUMP;
3046 }
3047 }
3048 goto fail;
3049 NEXT;
3050
3051 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
3052 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3053 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3054 MOP_OUT;
3055 JUMP;
3056 }
3057 }
3058 goto fail;
3059 NEXT;
3060#endif
3061
3062 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
3063 if (! ON_STR_BEGIN(s)) goto fail;
3064 if (IS_NOTBOS(msa->options)) goto fail;
3065
3066 MOP_OUT;
3067 JUMP;
3068
3069 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
3070 if (! ON_STR_END(s)) goto fail;
3071 if (IS_NOTEOS(msa->options)) goto fail;
3072
3073 MOP_OUT;
3074 JUMP;
3075
3076 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
3077 if (ON_STR_BEGIN(s)) {
3078 if (IS_NOTBOL(msa->options)) goto fail;
3079 MOP_OUT;
3080 JUMP;
3081 }
3082 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3083#ifdef USE_CRNL_AS_LINE_TERMINATOR
3084 && !(IS_NEWLINE_CRLF(option)
3085 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3086#endif
3087 && !ON_STR_END(s)) {
3088 MOP_OUT;
3089 JUMP;
3090 }
3091 goto fail;
3092 NEXT;
3093
3094 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
3095 if (ON_STR_END(s)) {
3096#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3097 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3098#endif
3099 if (IS_NOTEOL(msa->options)) goto fail;
3100 MOP_OUT;
3101 JUMP;
3102#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3103 }
3104#endif
3105 }
3106 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3107 MOP_OUT;
3108 JUMP;
3109 }
3110 goto fail;
3111 NEXT;
3112
3113 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
3114 if (ON_STR_END(s)) {
3115#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3116 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3117#endif
3118 if (IS_NOTEOL(msa->options)) goto fail;
3119 MOP_OUT;
3120 JUMP;
3121#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3122 }
3123#endif
3124 }
3125 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3126 UChar* ss = s + enclen(encode, s, end);
3127 if (ON_STR_END(ss)) {
3128 MOP_OUT;
3129 JUMP;
3130 }
3131#ifdef USE_CRNL_AS_LINE_TERMINATOR
3132 else if (IS_NEWLINE_CRLF(option)
3133 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3134 ss += enclen(encode, ss, end);
3135 if (ON_STR_END(ss)) {
3136 MOP_OUT;
3137 JUMP;
3138 }
3139 }
3140#endif
3141 }
3142 goto fail;
3143 NEXT;
3144
3145 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
3146 if (s != msa->gpos)
3147 goto fail;
3148
3149 MOP_OUT;
3150 JUMP;
3151
3152 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
3153 GET_MEMNUM_INC(mem, p);
3154 STACK_PUSH_MEM_START(mem, s);
3155 MOP_OUT;
3156 JUMP;
3157
3158 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
3159 GET_MEMNUM_INC(mem, p);
3160 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
3161 mem_end_stk[mem] = INVALID_STACK_INDEX;
3162 MOP_OUT;
3163 JUMP;
3164
3165 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
3166 GET_MEMNUM_INC(mem, p);
3167 STACK_PUSH_MEM_END(mem, s);
3168 MOP_OUT;
3169 JUMP;
3170
3171 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
3172 GET_MEMNUM_INC(mem, p);
3173 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3174 MOP_OUT;
3175 JUMP;
3176
3177 CASE(OP_KEEP) MOP_IN(OP_KEEP);
3178 pkeep = s;
3179 MOP_OUT;
3180 JUMP;
3181
3182#ifdef USE_SUBEXP_CALL
3183 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
3184 GET_MEMNUM_INC(mem, p);
3185 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3186 STACK_PUSH_MEM_END(mem, s);
3187 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3188 MOP_OUT;
3189 JUMP;
3190
3191 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
3192 GET_MEMNUM_INC(mem, p);
3193 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3194 STACK_GET_MEM_START(mem, stkp);
3195
3196 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3197 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3198 else
3199 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
3200
3201 STACK_PUSH_MEM_END_MARK(mem);
3202 MOP_OUT;
3203 JUMP;
3204#endif
3205
3206 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
3207 mem = 1;
3208 goto backref;
3209 NEXT;
3210
3211 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
3212 mem = 2;
3213 goto backref;
3214 NEXT;
3215
3216 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
3217 GET_MEMNUM_INC(mem, p);
3218 backref:
3219 {
3220 int len;
3221 UChar *pstart, *pend;
3222
3223 /* if you want to remove following line,
3224 you should check in parse and compile time. */
3225 if (mem > num_mem) goto fail;
3226 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3227 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3228
3229 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3230 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3231 else
3232 pstart = (UChar* )((void* )mem_start_stk[mem]);
3233
3234 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3235 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3236 : (UChar* )((void* )mem_end_stk[mem]));
3237 n = pend - pstart;
3238 DATA_ENSURE(n);
3239 sprev = s;
3240 STRING_CMP(pstart, s, n);
3241 while (sprev + (len = enclen(encode, sprev, end)) < s)
3242 sprev += len;
3243
3244 MOP_OUT;
3245 JUMP;
3246 }
3247
3248 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
3249 GET_MEMNUM_INC(mem, p);
3250 {
3251 int len;
3252 UChar *pstart, *pend;
3253
3254 /* if you want to remove following line,
3255 you should check in parse and compile time. */
3256 if (mem > num_mem) goto fail;
3257 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3258 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3259
3260 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3261 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3262 else
3263 pstart = (UChar* )((void* )mem_start_stk[mem]);
3264
3265 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3266 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3267 : (UChar* )((void* )mem_end_stk[mem]));
3268 n = pend - pstart;
3269 DATA_ENSURE(n);
3270 sprev = s;
3271 STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
3272 while (sprev + (len = enclen(encode, sprev, end)) < s)
3273 sprev += len;
3274
3275 MOP_OUT;
3276 JUMP;
3277 }
3278 NEXT;
3279
3280 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
3281 {
3282 int len, is_fail;
3283 UChar *pstart, *pend, *swork;
3284
3285 GET_LENGTH_INC(tlen, p);
3286 for (i = 0; i < tlen; i++) {
3287 GET_MEMNUM_INC(mem, p);
3288
3289 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3290 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3291
3292 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3293 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3294 else
3295 pstart = (UChar* )((void* )mem_start_stk[mem]);
3296
3297 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3298 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3299 : (UChar* )((void* )mem_end_stk[mem]));
3300 n = pend - pstart;
3301 DATA_ENSURE_CONTINUE(n);
3302 sprev = s;
3303 swork = s;
3304 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3305 if (is_fail) continue;
3306 s = swork;
3307 while (sprev + (len = enclen(encode, sprev, end)) < s)
3308 sprev += len;
3309
3310 p += (SIZE_MEMNUM * (tlen - i - 1));
3311 break; /* success */
3312 }
3313 if (i == tlen) goto fail;
3314 MOP_OUT;
3315 JUMP;
3316 }
3317 NEXT;
3318
3319 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
3320 {
3321 int len, is_fail;
3322 UChar *pstart, *pend, *swork;
3323
3324 GET_LENGTH_INC(tlen, p);
3325 for (i = 0; i < tlen; i++) {
3326 GET_MEMNUM_INC(mem, p);
3327
3328 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3329 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3330
3331 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3332 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3333 else
3334 pstart = (UChar* )((void* )mem_start_stk[mem]);
3335
3336 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3337 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3338 : (UChar* )((void* )mem_end_stk[mem]));
3339 n = pend - pstart;
3340 DATA_ENSURE_CONTINUE(n);
3341 sprev = s;
3342 swork = s;
3343 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3344 if (is_fail) continue;
3345 s = swork;
3346 while (sprev + (len = enclen(encode, sprev, end)) < s)
3347 sprev += len;
3348
3349 p += (SIZE_MEMNUM * (tlen - i - 1));
3350 break; /* success */
3351 }
3352 if (i == tlen) goto fail;
3353 MOP_OUT;
3354 JUMP;
3355 }
3356
3357#ifdef USE_BACKREF_WITH_LEVEL
3358 CASE(OP_BACKREF_WITH_LEVEL)
3359 {
3360 int len;
3361 OnigOptionType ic;
3362 LengthType level;
3363
3364 GET_OPTION_INC(ic, p);
3365 GET_LENGTH_INC(level, p);
3366 GET_LENGTH_INC(tlen, p);
3367
3368 sprev = s;
3369 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3370 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
3371 while (sprev + (len = enclen(encode, sprev, end)) < s)
3372 sprev += len;
3373
3374 p += (SIZE_MEMNUM * tlen);
3375 }
3376 else
3377 goto fail;
3378
3379 MOP_OUT;
3380 JUMP;
3381 }
3382
3383#endif
3384
3385#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
3386 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
3387 GET_OPTION_INC(option, p);
3388 STACK_PUSH_ALT(p, s, sprev, pkeep);
3389 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3390 MOP_OUT;
3391 JUMP;
3392
3393 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
3394 GET_OPTION_INC(option, p);
3395 MOP_OUT;
3396 JUMP;
3397#endif
3398
3399 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
3400 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3401 STACK_PUSH_NULL_CHECK_START(mem, s);
3402 MOP_OUT;
3403 JUMP;
3404
3405 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
3406 {
3407 int isnull;
3408
3409 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3410 STACK_NULL_CHECK(isnull, mem, s);
3411 if (isnull) {
3412#ifdef ONIG_DEBUG_MATCH
3413 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
3414 (int )mem, (uintptr_t )s, s);
3415#endif
3416 null_check_found:
3417 /* empty loop founded, skip next instruction */
3418 switch (*p++) {
3419 case OP_JUMP:
3420 case OP_PUSH:
3421 p += SIZE_RELADDR;
3422 break;
3423 case OP_REPEAT_INC:
3424 case OP_REPEAT_INC_NG:
3425 case OP_REPEAT_INC_SG:
3426 case OP_REPEAT_INC_NG_SG:
3427 p += SIZE_MEMNUM;
3428 break;
3429 default:
3430 goto unexpected_bytecode_error;
3431 break;
3432 }
3433 }
3434 }
3435 MOP_OUT;
3436 JUMP;
3437
3438#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3439 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
3440 {
3441 int isnull;
3442 int ischanged = 0; // set 1 when a loop is empty but memory status is changed.
3443
3444 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3445 STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg);
3446 if (isnull) {
3447# ifdef ONIG_DEBUG_MATCH
3448 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
3449 (int )mem, (uintptr_t )s, s);
3450# endif
3451 if (isnull == -1) goto fail;
3452 goto null_check_found;
3453 }
3454# ifdef USE_CACHE_MATCH_OPT
3455 if (ischanged && msa->enable_cache_match_opt) {
3456 RelAddrType rel;
3457 OnigUChar *addr;
3458 int mem;
3459 UChar* tmp = p;
3460 switch (*tmp++) {
3461 case OP_JUMP:
3462 case OP_PUSH:
3463 GET_RELADDR_INC(rel, tmp);
3464 addr = tmp + rel;
3465 break;
3466 case OP_REPEAT_INC:
3467 case OP_REPEAT_INC_NG:
3468 GET_MEMNUM_INC(mem, tmp);
3469 addr = STACK_AT(repeat_stk[mem])->u.repeat.pcode;
3470 break;
3471 default:
3472 goto unexpected_bytecode_error;
3473 }
3474 reset_match_cache(reg, addr, pbegin, (long)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_table ,msa->num_cache_opcode);
3475 }
3476# endif
3477 }
3478 MOP_OUT;
3479 JUMP;
3480#endif
3481
3482#ifdef USE_SUBEXP_CALL
3483 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3484 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3485 {
3486 int isnull;
3487
3488 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3489# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3490 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3491# else
3492 STACK_NULL_CHECK_REC(isnull, mem, s);
3493# endif
3494 if (isnull) {
3495# ifdef ONIG_DEBUG_MATCH
3496 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
3497 (int )mem, (uintptr_t )s, s);
3498# endif
3499 if (isnull == -1) goto fail;
3500 goto null_check_found;
3501 }
3502 else {
3503 STACK_PUSH_NULL_CHECK_END(mem);
3504 }
3505 }
3506 MOP_OUT;
3507 JUMP;
3508#endif
3509
3510 CASE(OP_JUMP) MOP_IN(OP_JUMP);
3511 GET_RELADDR_INC(addr, p);
3512 p += addr;
3513 MOP_OUT;
3514 CHECK_INTERRUPT_IN_MATCH_AT;
3515 JUMP;
3516
3517 CASE(OP_PUSH) MOP_IN(OP_PUSH);
3518 GET_RELADDR_INC(addr, p);
3519 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3520 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3521 MOP_OUT;
3522 JUMP;
3523
3524#ifdef USE_COMBINATION_EXPLOSION_CHECK
3525 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
3526 GET_STATE_CHECK_NUM_INC(mem, p);
3527 STATE_CHECK_VAL(scv, mem);
3528 if (scv) goto fail;
3529
3530 GET_RELADDR_INC(addr, p);
3531 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3532 MOP_OUT;
3533 JUMP;
3534
3535 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3536 GET_STATE_CHECK_NUM_INC(mem, p);
3537 GET_RELADDR_INC(addr, p);
3538 STATE_CHECK_VAL(scv, mem);
3539 if (scv) {
3540 p += addr;
3541 }
3542 else {
3543 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3544 }
3545 MOP_OUT;
3546 JUMP;
3547
3548 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
3549 GET_STATE_CHECK_NUM_INC(mem, p);
3550 STATE_CHECK_VAL(scv, mem);
3551 if (scv) goto fail;
3552
3553 STACK_PUSH_STATE_CHECK(s, mem);
3554 MOP_OUT;
3555 JUMP;
3556#endif /* USE_COMBINATION_EXPLOSION_CHECK */
3557
3558 CASE(OP_POP) MOP_IN(OP_POP);
3559 STACK_POP_ONE;
3560 /* We need to increment num_fail here, for invoking a cache optimization correctly, */
3561 /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */
3562#ifdef USE_CACHE_MATCH_OPT
3563 msa->num_fail++;
3564#endif
3565 MOP_OUT;
3566 JUMP;
3567
3568#ifdef USE_OP_PUSH_OR_JUMP_EXACT
3569 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3570 GET_RELADDR_INC(addr, p);
3571 if (*p == *s && DATA_ENSURE_CHECK1) {
3572 p++;
3573 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3574 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3575 MOP_OUT;
3576 JUMP;
3577 }
3578 p += (addr + 1);
3579 MOP_OUT;
3580 JUMP;
3581#endif
3582
3583 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3584 GET_RELADDR_INC(addr, p);
3585 if (*p == *s) {
3586 p++;
3587 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3588 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3589 MOP_OUT;
3590 JUMP;
3591 }
3592 p++;
3593 MOP_OUT;
3594 JUMP;
3595
3596 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
3597 {
3598 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3599 GET_RELADDR_INC(addr, p);
3600
3601 STACK_ENSURE(1);
3602 repeat_stk[mem] = GET_STACK_INDEX(stk);
3603 STACK_PUSH_REPEAT(mem, p);
3604
3605 if (reg->repeat_range[mem].lower == 0) {
3606 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
3607 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3608 }
3609 }
3610 MOP_OUT;
3611 JUMP;
3612
3613 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
3614 {
3615 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3616 GET_RELADDR_INC(addr, p);
3617
3618 STACK_ENSURE(1);
3619 repeat_stk[mem] = GET_STACK_INDEX(stk);
3620 STACK_PUSH_REPEAT(mem, p);
3621
3622 if (reg->repeat_range[mem].lower == 0) {
3623 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3624 STACK_PUSH_ALT(p, s, sprev, pkeep);
3625 p += addr;
3626 }
3627 }
3628 MOP_OUT;
3629 JUMP;
3630
3631 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
3632 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3633 si = repeat_stk[mem];
3634 stkp = STACK_AT(si);
3635
3636 repeat_inc:
3637 stkp->u.repeat.count++;
3638 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3639 /* end of repeat. Nothing to do. */
3640 }
3641 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3642 if (*pbegin == OP_REPEAT_INC) {
3643 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3644 }
3645 STACK_PUSH_ALT(p, s, sprev, pkeep);
3646 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
3647 }
3648 else {
3649 p = stkp->u.repeat.pcode;
3650 }
3651 STACK_PUSH_REPEAT_INC(si);
3652 MOP_OUT;
3653 CHECK_INTERRUPT_IN_MATCH_AT;
3654 JUMP;
3655
3656 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
3657 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3658 STACK_GET_REPEAT(mem, stkp);
3659 si = GET_STACK_INDEX(stkp);
3660 goto repeat_inc;
3661 NEXT;
3662
3663 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
3664 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3665 si = repeat_stk[mem];
3666 stkp = STACK_AT(si);
3667
3668 repeat_inc_ng:
3669 stkp->u.repeat.count++;
3670 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3671 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3672 UChar* pcode = stkp->u.repeat.pcode;
3673
3674 STACK_PUSH_REPEAT_INC(si);
3675 if (*pbegin == OP_REPEAT_INC_NG) {
3676 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3677 }
3678 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3679 }
3680 else {
3681 p = stkp->u.repeat.pcode;
3682 STACK_PUSH_REPEAT_INC(si);
3683 }
3684 }
3685 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3686 STACK_PUSH_REPEAT_INC(si);
3687 }
3688 MOP_OUT;
3689 CHECK_INTERRUPT_IN_MATCH_AT;
3690 JUMP;
3691
3692 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3693 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3694 STACK_GET_REPEAT(mem, stkp);
3695 si = GET_STACK_INDEX(stkp);
3696 goto repeat_inc_ng;
3697 NEXT;
3698
3699 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3700 STACK_PUSH_POS(s, sprev, pkeep);
3701 MOP_OUT;
3702 JUMP;
3703
3704 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3705 {
3706 STACK_POS_END(stkp);
3707 s = stkp->u.state.pstr;
3708 sprev = stkp->u.state.pstr_prev;
3709 }
3710 MOP_OUT;
3711 JUMP;
3712
3713 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3714 GET_RELADDR_INC(addr, p);
3715 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3716 MOP_OUT;
3717 JUMP;
3718
3719 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3720 STACK_POP_TIL_POS_NOT;
3721 goto fail;
3722 NEXT;
3723
3724 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3725 STACK_PUSH_STOP_BT;
3726 MOP_OUT;
3727 JUMP;
3728
3729 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3730 STACK_STOP_BT_END;
3731 MOP_OUT;
3732 JUMP;
3733
3734 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3735 GET_LENGTH_INC(tlen, p);
3736 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3737 if (IS_NULL(s)) goto fail;
3738 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3739 MOP_OUT;
3740 JUMP;
3741
3742 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3743 GET_RELADDR_INC(addr, p);
3744 GET_LENGTH_INC(tlen, p);
3745 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3746 if (IS_NULL(q)) {
3747 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3748 If you want to change to fail, replace following line. */
3749 p += addr;
3750 /* goto fail; */
3751 }
3752 else {
3753 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3754 s = q;
3755 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3756 }
3757 MOP_OUT;
3758 JUMP;
3759
3760 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
3761 STACK_POP_TIL_LOOK_BEHIND_NOT;
3762 goto fail;
3763 NEXT;
3764
3765 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
3766 /* Save the absent-start-pos and the original end-pos. */
3767 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
3768 MOP_OUT;
3769 JUMP;
3770
3771 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
3772 {
3773 const UChar* aend = ABSENT_END_POS;
3774 UChar* absent;
3775 UChar* selfp = p - 1;
3776
3777 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
3778 GET_RELADDR_INC(addr, p);
3779#ifdef ONIG_DEBUG_MATCH
3780 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3781#endif
3782 if ((absent > aend) && (s > absent)) {
3783 /* An empty match occurred in (?~...) at the start point.
3784 * Never match. */
3785 STACK_POP;
3786 goto fail;
3787 }
3788 else if ((s >= aend) && (s > absent)) {
3789 if (s > aend) {
3790 /* Only one (or less) character matched in the last iteration.
3791 * This is not a possible point. */
3792 goto fail;
3793 }
3794 /* All possible points were found. Try matching after (?~...). */
3795 DATA_ENSURE(0);
3796 p += addr;
3797 }
3798 else if (s == end) {
3799 /* At the end of the string, just match with it */
3800 DATA_ENSURE(0);
3801 p += addr;
3802 }
3803 else {
3804 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
3805 n = enclen(encode, s, end);
3806 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
3807 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
3808 STACK_PUSH_ABSENT;
3809 ABSENT_END_POS = aend;
3810 }
3811 }
3812 MOP_OUT;
3813 JUMP;
3814
3815 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
3816 /* The pattern inside (?~...) was matched.
3817 * Set the end-pos temporary and go to next iteration. */
3818 if (sprev < ABSENT_END_POS)
3819 ABSENT_END_POS = sprev;
3820#ifdef ONIG_DEBUG_MATCH
3821 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
3822#endif
3823 STACK_POP_TIL_ABSENT;
3824 goto fail;
3825 NEXT;
3826
3827#ifdef USE_SUBEXP_CALL
3828 CASE(OP_CALL) MOP_IN(OP_CALL);
3829 GET_ABSADDR_INC(addr, p);
3830 STACK_PUSH_CALL_FRAME(p);
3831 p = reg->p + addr;
3832 MOP_OUT;
3833 JUMP;
3834
3835 CASE(OP_RETURN) MOP_IN(OP_RETURN);
3836 STACK_RETURN(p);
3837 STACK_PUSH_RETURN;
3838 MOP_OUT;
3839 JUMP;
3840#endif
3841
3842 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
3843 GET_MEMNUM_INC(mem, p);
3844 GET_RELADDR_INC(addr, p);
3845 if ((mem > num_mem) ||
3846 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3847 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3848 p += addr;
3849 }
3850 MOP_OUT;
3851 JUMP;
3852
3853 CASE(OP_FINISH)
3854 goto finish;
3855 NEXT;
3856
3857 CASE(OP_FAIL)
3858 if (0) {
3859 /* fall */
3860 fail:
3861 MOP_OUT;
3862 }
3863 MOP_IN(OP_FAIL);
3864 STACK_POP;
3865 p = stk->u.state.pcode;
3866 s = stk->u.state.pstr;
3867 sprev = stk->u.state.pstr_prev;
3868 pkeep = stk->u.state.pkeep;
3869
3870#ifdef USE_CACHE_MATCH_OPT
3871 if (++msa->num_fail >= (long)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
3872 msa->enable_cache_match_opt = 1;
3873 if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
3874 OnigPosition r = count_num_cache_opcode(reg, &msa->num_cache_opcode, &msa->num_cache_table);
3875 if (r < 0) goto bytecode_error;
3876 }
3877 if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) {
3878 msa->enable_cache_match_opt = 0;
3879 goto fail_match_cache_opt;
3880 }
3881 if (msa->cache_index_table == NULL) {
3882 OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(msa->num_cache_table * sizeof(OnigCacheIndex));
3883 if (table == NULL) {
3884 return ONIGERR_MEMORY;
3885 }
3886 OnigPosition r = init_cache_index_table(reg, table);
3887 if (r < 0) {
3888 if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error;
3889 else goto bytecode_error;
3890 }
3891 msa->cache_index_table = table;
3892 }
3893 size_t len = (end - str) + 1;
3894 size_t match_cache_size8 = (size_t)msa->num_cache_opcode * len;
3895 /* overflow check */
3896 if (match_cache_size8 / len != (size_t)msa->num_cache_opcode) {
3897 return ONIGERR_MEMORY;
3898 }
3899 /* Currently, int is used for the key of match_cache */
3900 if (match_cache_size8 >= LONG_MAX_LIMIT) {
3901 return ONIGERR_MEMORY;
3902 }
3903 size_t match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0);
3904 msa->match_cache = (uint8_t*)xmalloc(match_cache_size * sizeof(uint8_t));
3905 if (msa->match_cache == NULL) {
3906 return ONIGERR_MEMORY;
3907 }
3908 xmemset(msa->match_cache, 0, match_cache_size * sizeof(uint8_t));
3909 }
3910 fail_match_cache_opt:
3911#endif
3912
3913#ifdef USE_COMBINATION_EXPLOSION_CHECK
3914 if (stk->u.state.state_check != 0) {
3915 stk->type = STK_STATE_CHECK_MARK;
3916 stk++;
3917 }
3918#endif
3919
3920 MOP_OUT;
3921 CHECK_INTERRUPT_IN_MATCH_AT;
3922 JUMP;
3923
3924 DEFAULT
3925 goto bytecode_error;
3926 } VM_LOOP_END
3927
3928 finish:
3929 STACK_SAVE;
3930 if (xmalloc_base) xfree(xmalloc_base);
3931 return best_len;
3932
3933#ifdef ONIG_DEBUG
3934 stack_error:
3935 STACK_SAVE;
3936 if (xmalloc_base) xfree(xmalloc_base);
3937 return ONIGERR_STACK_BUG;
3938#endif
3939
3940 bytecode_error:
3941 STACK_SAVE;
3942 if (xmalloc_base) xfree(xmalloc_base);
3943 return ONIGERR_UNDEFINED_BYTECODE;
3944
3945 unexpected_bytecode_error:
3946 STACK_SAVE;
3947 if (xmalloc_base) xfree(xmalloc_base);
3948 return ONIGERR_UNEXPECTED_BYTECODE;
3949}
3950
3951
3952static UChar*
3953slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
3954 const UChar* text, const UChar* text_end, UChar* text_range)
3955{
3956 UChar *t, *p, *s, *end;
3957
3958 end = (UChar* )text_end;
3959 end -= target_end - target - 1;
3960 if (end > text_range)
3961 end = text_range;
3962
3963 s = (UChar* )text;
3964
3965 if (enc->max_enc_len == enc->min_enc_len) {
3966 int n = enc->max_enc_len;
3967
3968 while (s < end) {
3969 if (*s == *target) {
3970 p = s + 1;
3971 t = target + 1;
3972 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3973 return s;
3974 }
3975 s += n;
3976 }
3977 return (UChar* )NULL;
3978 }
3979 while (s < end) {
3980 if (*s == *target) {
3981 p = s + 1;
3982 t = target + 1;
3983 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3984 return s;
3985 }
3986 s += enclen(enc, s, text_end);
3987 }
3988
3989 return (UChar* )NULL;
3990}
3991
3992static int
3993str_lower_case_match(OnigEncoding enc, int case_fold_flag,
3994 const UChar* t, const UChar* tend,
3995 const UChar* p, const UChar* end)
3996{
3997 int lowlen;
3998 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3999
4000 while (t < tend) {
4001 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4002 q = lowbuf;
4003 while (lowlen > 0) {
4004 if (*t++ != *q++) return 0;
4005 lowlen--;
4006 }
4007 }
4008
4009 return 1;
4010}
4011
4012static UChar*
4013slow_search_ic(OnigEncoding enc, int case_fold_flag,
4014 UChar* target, UChar* target_end,
4015 const UChar* text, const UChar* text_end, UChar* text_range)
4016{
4017 UChar *s, *end;
4018
4019 end = (UChar* )text_end;
4020 end -= target_end - target - 1;
4021 if (end > text_range)
4022 end = text_range;
4023
4024 s = (UChar* )text;
4025
4026 while (s < end) {
4027 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4028 s, text_end))
4029 return s;
4030
4031 s += enclen(enc, s, text_end);
4032 }
4033
4034 return (UChar* )NULL;
4035}
4036
4037static UChar*
4038slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4039 const UChar* text, const UChar* adjust_text,
4040 const UChar* text_end, const UChar* text_start)
4041{
4042 UChar *t, *p, *s;
4043
4044 s = (UChar* )text_end;
4045 s -= (target_end - target);
4046 if (s > text_start)
4047 s = (UChar* )text_start;
4048 else
4049 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4050
4051 while (s >= text) {
4052 if (*s == *target) {
4053 p = s + 1;
4054 t = target + 1;
4055 while (t < target_end) {
4056 if (*t != *p++)
4057 break;
4058 t++;
4059 }
4060 if (t == target_end)
4061 return s;
4062 }
4063 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4064 }
4065
4066 return (UChar* )NULL;
4067}
4068
4069static UChar*
4070slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4071 UChar* target, UChar* target_end,
4072 const UChar* text, const UChar* adjust_text,
4073 const UChar* text_end, const UChar* text_start)
4074{
4075 UChar *s;
4076
4077 s = (UChar* )text_end;
4078 s -= (target_end - target);
4079 if (s > text_start)
4080 s = (UChar* )text_start;
4081 else
4082 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4083
4084 while (s >= text) {
4085 if (str_lower_case_match(enc, case_fold_flag,
4086 target, target_end, s, text_end))
4087 return s;
4088
4089 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4090 }
4091
4092 return (UChar* )NULL;
4093}
4094
4095#ifndef USE_SUNDAY_QUICK_SEARCH
4096/* Boyer-Moore-Horspool search applied to a multibyte string */
4097static UChar*
4098bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4099 const UChar* text, const UChar* text_end,
4100 const UChar* text_range)
4101{
4102 const UChar *s, *se, *t, *p, *end;
4103 const UChar *tail;
4104 ptrdiff_t skip, tlen1;
4105
4106# ifdef ONIG_DEBUG_SEARCH
4107 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4108 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4109# endif
4110
4111 tail = target_end - 1;
4112 tlen1 = tail - target;
4113 end = text_range;
4114 if (end + tlen1 > text_end)
4115 end = text_end - tlen1;
4116
4117 s = text;
4118
4119 if (IS_NULL(reg->int_map)) {
4120 while (s < end) {
4121 p = se = s + tlen1;
4122 t = tail;
4123 while (*p == *t) {
4124 if (t == target) return (UChar* )s;
4125 p--; t--;
4126 }
4127 skip = reg->map[*se];
4128 t = s;
4129 do {
4130 s += enclen(reg->enc, s, end);
4131 } while ((s - t) < skip && s < end);
4132 }
4133 }
4134 else {
4135# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4136 while (s < end) {
4137 p = se = s + tlen1;
4138 t = tail;
4139 while (*p == *t) {
4140 if (t == target) return (UChar* )s;
4141 p--; t--;
4142 }
4143 skip = reg->int_map[*se];
4144 t = s;
4145 do {
4146 s += enclen(reg->enc, s, end);
4147 } while ((s - t) < skip && s < end);
4148 }
4149# endif
4150 }
4151
4152 return (UChar* )NULL;
4153}
4154
4155/* Boyer-Moore-Horspool search */
4156static UChar*
4157bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4158 const UChar* text, const UChar* text_end, const UChar* text_range)
4159{
4160 const UChar *s, *t, *p, *end;
4161 const UChar *tail;
4162
4163# ifdef ONIG_DEBUG_SEARCH
4164 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4165 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4166# endif
4167
4168 end = text_range + (target_end - target) - 1;
4169 if (end > text_end)
4170 end = text_end;
4171
4172 tail = target_end - 1;
4173 s = text + (target_end - target) - 1;
4174 if (IS_NULL(reg->int_map)) {
4175 while (s < end) {
4176 p = s;
4177 t = tail;
4178# ifdef ONIG_DEBUG_SEARCH
4179 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
4180 (intptr_t )(s - text), s);
4181# endif
4182 while (*p == *t) {
4183 if (t == target) return (UChar* )p;
4184 p--; t--;
4185 }
4186 s += reg->map[*s];
4187 }
4188 }
4189 else { /* see int_map[] */
4190# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4191 while (s < end) {
4192 p = s;
4193 t = tail;
4194 while (*p == *t) {
4195 if (t == target) return (UChar* )p;
4196 p--; t--;
4197 }
4198 s += reg->int_map[*s];
4199 }
4200# endif
4201 }
4202 return (UChar* )NULL;
4203}
4204
4205/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
4206static UChar*
4207bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4208 const UChar* text, const UChar* text_end,
4209 const UChar* text_range)
4210{
4211 const UChar *s, *se, *t, *end;
4212 const UChar *tail;
4213 ptrdiff_t skip, tlen1;
4214 OnigEncoding enc = reg->enc;
4215 int case_fold_flag = reg->case_fold_flag;
4216
4217# ifdef ONIG_DEBUG_SEARCH
4218 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4219 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4220# endif
4221
4222 tail = target_end - 1;
4223 tlen1 = tail - target;
4224 end = text_range;
4225 if (end + tlen1 > text_end)
4226 end = text_end - tlen1;
4227
4228 s = text;
4229
4230 if (IS_NULL(reg->int_map)) {
4231 while (s < end) {
4232 se = s + tlen1;
4233 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4234 s, se + 1))
4235 return (UChar* )s;
4236 skip = reg->map[*se];
4237 t = s;
4238 do {
4239 s += enclen(reg->enc, s, end);
4240 } while ((s - t) < skip && s < end);
4241 }
4242 }
4243 else {
4244# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4245 while (s < end) {
4246 se = s + tlen1;
4247 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4248 s, se + 1))
4249 return (UChar* )s;
4250 skip = reg->int_map[*se];
4251 t = s;
4252 do {
4253 s += enclen(reg->enc, s, end);
4254 } while ((s - t) < skip && s < end);
4255 }
4256# endif
4257 }
4258
4259 return (UChar* )NULL;
4260}
4261
4262/* Boyer-Moore-Horspool search (ignore case) */
4263static UChar*
4264bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4265 const UChar* text, const UChar* text_end, const UChar* text_range)
4266{
4267 const UChar *s, *p, *end;
4268 const UChar *tail;
4269 OnigEncoding enc = reg->enc;
4270 int case_fold_flag = reg->case_fold_flag;
4271
4272# ifdef ONIG_DEBUG_SEARCH
4273 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4274 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4275# endif
4276
4277 end = text_range + (target_end - target) - 1;
4278 if (end > text_end)
4279 end = text_end;
4280
4281 tail = target_end - 1;
4282 s = text + (target_end - target) - 1;
4283 if (IS_NULL(reg->int_map)) {
4284 while (s < end) {
4285 p = s - (target_end - target) + 1;
4286 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4287 p, s + 1))
4288 return (UChar* )p;
4289 s += reg->map[*s];
4290 }
4291 }
4292 else { /* see int_map[] */
4293# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4294 while (s < end) {
4295 p = s - (target_end - target) + 1;
4296 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4297 p, s + 1))
4298 return (UChar* )p;
4299 s += reg->int_map[*s];
4300 }
4301# endif
4302 }
4303 return (UChar* )NULL;
4304}
4305
4306#else /* USE_SUNDAY_QUICK_SEARCH */
4307
4308/* Sunday's quick search applied to a multibyte string */
4309static UChar*
4310bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4311 const UChar* text, const UChar* text_end,
4312 const UChar* text_range)
4313{
4314 const UChar *s, *se, *t, *p, *end;
4315 const UChar *tail;
4316 ptrdiff_t skip, tlen1;
4317 OnigEncoding enc = reg->enc;
4318
4319# ifdef ONIG_DEBUG_SEARCH
4320 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4321 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4322# endif
4323
4324 tail = target_end - 1;
4325 tlen1 = tail - target;
4326 end = text_range;
4327 if (end + tlen1 > text_end)
4328 end = text_end - tlen1;
4329
4330 s = text;
4331
4332 if (IS_NULL(reg->int_map)) {
4333 while (s < end) {
4334 p = se = s + tlen1;
4335 t = tail;
4336 while (*p == *t) {
4337 if (t == target) return (UChar* )s;
4338 p--; t--;
4339 }
4340 if (s + 1 >= end) break;
4341 skip = reg->map[se[1]];
4342 t = s;
4343 do {
4344 s += enclen(enc, s, end);
4345 } while ((s - t) < skip && s < end);
4346 }
4347 }
4348 else {
4349# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4350 while (s < end) {
4351 p = se = s + tlen1;
4352 t = tail;
4353 while (*p == *t) {
4354 if (t == target) return (UChar* )s;
4355 p--; t--;
4356 }
4357 if (s + 1 >= end) break;
4358 skip = reg->int_map[se[1]];
4359 t = s;
4360 do {
4361 s += enclen(enc, s, end);
4362 } while ((s - t) < skip && s < end);
4363 }
4364# endif
4365 }
4366
4367 return (UChar* )NULL;
4368}
4369
4370/* Sunday's quick search */
4371static UChar*
4372bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4373 const UChar* text, const UChar* text_end, const UChar* text_range)
4374{
4375 const UChar *s, *t, *p, *end;
4376 const UChar *tail;
4377 ptrdiff_t tlen1;
4378
4379# ifdef ONIG_DEBUG_SEARCH
4380 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4381 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4382# endif
4383
4384 tail = target_end - 1;
4385 tlen1 = tail - target;
4386 end = text_range + tlen1;
4387 if (end > text_end)
4388 end = text_end;
4389
4390 s = text + tlen1;
4391 if (IS_NULL(reg->int_map)) {
4392 while (s < end) {
4393 p = s;
4394 t = tail;
4395 while (*p == *t) {
4396 if (t == target) return (UChar* )p;
4397 p--; t--;
4398 }
4399 if (s + 1 >= end) break;
4400 s += reg->map[s[1]];
4401 }
4402 }
4403 else { /* see int_map[] */
4404# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4405 while (s < end) {
4406 p = s;
4407 t = tail;
4408 while (*p == *t) {
4409 if (t == target) return (UChar* )p;
4410 p--; t--;
4411 }
4412 if (s + 1 >= end) break;
4413 s += reg->int_map[s[1]];
4414 }
4415# endif
4416 }
4417 return (UChar* )NULL;
4418}
4419
4420/* Sunday's quick search applied to a multibyte string (ignore case) */
4421static UChar*
4422bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4423 const UChar* text, const UChar* text_end,
4424 const UChar* text_range)
4425{
4426 const UChar *s, *se, *t, *end;
4427 const UChar *tail;
4428 ptrdiff_t skip, tlen1;
4429 OnigEncoding enc = reg->enc;
4430 int case_fold_flag = reg->case_fold_flag;
4431
4432# ifdef ONIG_DEBUG_SEARCH
4433 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4434 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4435# endif
4436
4437 tail = target_end - 1;
4438 tlen1 = tail - target;
4439 end = text_range;
4440 if (end + tlen1 > text_end)
4441 end = text_end - tlen1;
4442
4443 s = text;
4444
4445 if (IS_NULL(reg->int_map)) {
4446 while (s < end) {
4447 se = s + tlen1;
4448 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4449 s, se + 1))
4450 return (UChar* )s;
4451 if (s + 1 >= end) break;
4452 skip = reg->map[se[1]];
4453 t = s;
4454 do {
4455 s += enclen(enc, s, end);
4456 } while ((s - t) < skip && s < end);
4457 }
4458 }
4459 else {
4460# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4461 while (s < end) {
4462 se = s + tlen1;
4463 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4464 s, se + 1))
4465 return (UChar* )s;
4466 if (s + 1 >= end) break;
4467 skip = reg->int_map[se[1]];
4468 t = s;
4469 do {
4470 s += enclen(enc, s, end);
4471 } while ((s - t) < skip && s < end);
4472 }
4473# endif
4474 }
4475
4476 return (UChar* )NULL;
4477}
4478
4479/* Sunday's quick search (ignore case) */
4480static UChar*
4481bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4482 const UChar* text, const UChar* text_end, const UChar* text_range)
4483{
4484 const UChar *s, *p, *end;
4485 const UChar *tail;
4486 ptrdiff_t tlen1;
4487 OnigEncoding enc = reg->enc;
4488 int case_fold_flag = reg->case_fold_flag;
4489
4490# ifdef ONIG_DEBUG_SEARCH
4491 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4492 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4493# endif
4494
4495 tail = target_end - 1;
4496 tlen1 = tail - target;
4497 end = text_range + tlen1;
4498 if (end > text_end)
4499 end = text_end;
4500
4501 s = text + tlen1;
4502 if (IS_NULL(reg->int_map)) {
4503 while (s < end) {
4504 p = s - tlen1;
4505 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4506 p, s + 1))
4507 return (UChar* )p;
4508 if (s + 1 >= end) break;
4509 s += reg->map[s[1]];
4510 }
4511 }
4512 else { /* see int_map[] */
4513# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4514 while (s < end) {
4515 p = s - tlen1;
4516 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4517 p, s + 1))
4518 return (UChar* )p;
4519 if (s + 1 >= end) break;
4520 s += reg->int_map[s[1]];
4521 }
4522# endif
4523 }
4524 return (UChar* )NULL;
4525}
4526#endif /* USE_SUNDAY_QUICK_SEARCH */
4527
4528#ifdef USE_INT_MAP_BACKWARD
4529static int
4530set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
4531 int** skip)
4532{
4533 int i, len;
4534
4535 if (IS_NULL(*skip)) {
4536 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4537 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
4538 }
4539
4540 len = (int )(end - s);
4541 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4542 (*skip)[i] = len;
4543
4544 for (i = len - 1; i > 0; i--)
4545 (*skip)[s[i]] = i;
4546
4547 return 0;
4548}
4549
4550static UChar*
4551bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
4552 const UChar* text, const UChar* adjust_text,
4553 const UChar* text_end, const UChar* text_start)
4554{
4555 const UChar *s, *t, *p;
4556
4557 s = text_end - (target_end - target);
4558 if (text_start < s)
4559 s = text_start;
4560 else
4561 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4562
4563 while (s >= text) {
4564 p = s;
4565 t = target;
4566 while (t < target_end && *p == *t) {
4567 p++; t++;
4568 }
4569 if (t == target_end)
4570 return (UChar* )s;
4571
4572 s -= reg->int_map_backward[*s];
4573 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4574 }
4575
4576 return (UChar* )NULL;
4577}
4578#endif
4579
4580static UChar*
4581map_search(OnigEncoding enc, UChar map[],
4582 const UChar* text, const UChar* text_range, const UChar* text_end)
4583{
4584 const UChar *s = text;
4585
4586 while (s < text_range) {
4587 if (map[*s]) return (UChar* )s;
4588
4589 s += enclen(enc, s, text_end);
4590 }
4591 return (UChar* )NULL;
4592}
4593
4594static UChar*
4595map_search_backward(OnigEncoding enc, UChar map[],
4596 const UChar* text, const UChar* adjust_text,
4597 const UChar* text_start, const UChar* text_end)
4598{
4599 const UChar *s = text_start;
4600
4601 while (s >= text) {
4602 if (map[*s]) return (UChar* )s;
4603
4604 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4605 }
4606 return (UChar* )NULL;
4607}
4608
4609extern OnigPosition
4610onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
4611 OnigOptionType option)
4612{
4613 ptrdiff_t r;
4614 UChar *prev;
4615 OnigMatchArg msa;
4616
4617 MATCH_ARG_INIT(msa, option, region, at, at);
4618#ifdef USE_COMBINATION_EXPLOSION_CHECK
4619 {
4620 ptrdiff_t offset = at - str;
4621 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4622 }
4623#endif
4624
4625 if (region) {
4626 r = onig_region_resize_clear(region, reg->num_mem + 1);
4627 }
4628 else
4629 r = 0;
4630
4631 if (r == 0) {
4632 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4633 r = match_at(reg, str, end,
4634#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4635 end,
4636#endif
4637 at, prev, &msa);
4638 }
4639
4640 MATCH_ARG_FREE(msa);
4641 return r;
4642}
4643
4644static int
4645forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
4646 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4647{
4648 UChar *p, *pprev = (UChar* )NULL;
4649
4650#ifdef ONIG_DEBUG_SEARCH
4651 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
4652 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4653#endif
4654
4655 p = s;
4656 if (reg->dmin > 0) {
4657 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4658 p += reg->dmin;
4659 }
4660 else {
4661 UChar *q = p + reg->dmin;
4662
4663 if (q >= end) return 0; /* fail */
4664 while (p < q) p += enclen(reg->enc, p, end);
4665 }
4666 }
4667
4668 retry:
4669 switch (reg->optimize) {
4670 case ONIG_OPTIMIZE_EXACT:
4671 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4672 break;
4673 case ONIG_OPTIMIZE_EXACT_IC:
4674 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4675 reg->exact, reg->exact_end, p, end, range);
4676 break;
4677
4678 case ONIG_OPTIMIZE_EXACT_BM:
4679 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4680 break;
4681
4682 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4683 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4684 break;
4685
4686 case ONIG_OPTIMIZE_EXACT_BM_IC:
4687 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4688 break;
4689
4690 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4691 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
4692 break;
4693
4694 case ONIG_OPTIMIZE_MAP:
4695 p = map_search(reg->enc, reg->map, p, range, end);
4696 break;
4697 }
4698
4699 if (p && p < range) {
4700 if (p - reg->dmin < s) {
4701 retry_gate:
4702 pprev = p;
4703 p += enclen(reg->enc, p, end);
4704 goto retry;
4705 }
4706
4707 if (reg->sub_anchor) {
4708 UChar* prev;
4709
4710 switch (reg->sub_anchor) {
4711 case ANCHOR_BEGIN_LINE:
4712 if (!ON_STR_BEGIN(p)) {
4713 prev = onigenc_get_prev_char_head(reg->enc,
4714 (pprev ? pprev : str), p, end);
4715 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
4716 goto retry_gate;
4717 }
4718 break;
4719
4720 case ANCHOR_END_LINE:
4721 if (ON_STR_END(p)) {
4722#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4723 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
4724 (pprev ? pprev : str), p);
4725 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
4726 goto retry_gate;
4727#endif
4728 }
4729 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
4730 goto retry_gate;
4731 break;
4732 }
4733 }
4734
4735 if (reg->dmax == 0) {
4736 *low = p;
4737 if (low_prev) {
4738 if (*low > s)
4739 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
4740 else
4741 *low_prev = onigenc_get_prev_char_head(reg->enc,
4742 (pprev ? pprev : str), p, end);
4743 }
4744 }
4745 else {
4746 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4747 if (p < str + reg->dmax) {
4748 *low = (UChar* )str;
4749 if (low_prev)
4750 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4751 }
4752 else {
4753 *low = p - reg->dmax;
4754 if (*low > s) {
4755 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4756 *low, end, (const UChar** )low_prev);
4757 if (low_prev && IS_NULL(*low_prev))
4758 *low_prev = onigenc_get_prev_char_head(reg->enc,
4759 (pprev ? pprev : s), *low, end);
4760 }
4761 else {
4762 if (low_prev)
4763 *low_prev = onigenc_get_prev_char_head(reg->enc,
4764 (pprev ? pprev : str), *low, end);
4765 }
4766 }
4767 }
4768 }
4769 /* no needs to adjust *high, *high is used as range check only */
4770 *high = p - reg->dmin;
4771
4772#ifdef ONIG_DEBUG_SEARCH
4773 fprintf(stderr,
4774 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
4775 *low - str, *high - str, reg->dmin, reg->dmax);
4776#endif
4777 return 1; /* success */
4778 }
4779
4780 return 0; /* fail */
4781}
4782
4783#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4784
4785static int
4786backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4787 UChar* s, const UChar* range, UChar* adjrange,
4788 UChar** low, UChar** high)
4789{
4790 UChar *p;
4791
4792 range += reg->dmin;
4793 p = s;
4794
4795 retry:
4796 switch (reg->optimize) {
4797 case ONIG_OPTIMIZE_EXACT:
4798 exact_method:
4799 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4800 range, adjrange, end, p);
4801 break;
4802
4803 case ONIG_OPTIMIZE_EXACT_IC:
4804 case ONIG_OPTIMIZE_EXACT_BM_IC:
4805 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4806 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4807 reg->exact, reg->exact_end,
4808 range, adjrange, end, p);
4809 break;
4810
4811 case ONIG_OPTIMIZE_EXACT_BM:
4812 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4813#ifdef USE_INT_MAP_BACKWARD
4814 if (IS_NULL(reg->int_map_backward)) {
4815 int r;
4816 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4817 goto exact_method;
4818
4819 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4820 &(reg->int_map_backward));
4821 if (r) return r;
4822 }
4823 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4824 end, p);
4825#else
4826 goto exact_method;
4827#endif
4828 break;
4829
4830 case ONIG_OPTIMIZE_MAP:
4831 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4832 break;
4833 }
4834
4835 if (p) {
4836 if (reg->sub_anchor) {
4837 UChar* prev;
4838
4839 switch (reg->sub_anchor) {
4840 case ANCHOR_BEGIN_LINE:
4841 if (!ON_STR_BEGIN(p)) {
4842 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4843 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4844 p = prev;
4845 goto retry;
4846 }
4847 }
4848 break;
4849
4850 case ANCHOR_END_LINE:
4851 if (ON_STR_END(p)) {
4852#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4853 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4854 if (IS_NULL(prev)) goto fail;
4855 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4856 p = prev;
4857 goto retry;
4858 }
4859#endif
4860 }
4861 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4862 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4863 if (IS_NULL(p)) goto fail;
4864 goto retry;
4865 }
4866 break;
4867 }
4868 }
4869
4870 /* no needs to adjust *high, *high is used as range check only */
4871 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4872 *low = p - reg->dmax;
4873 *high = p - reg->dmin;
4874 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4875 }
4876
4877#ifdef ONIG_DEBUG_SEARCH
4878 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4879 (int )(*low - str), (int )(*high - str));
4880#endif
4881 return 1; /* success */
4882 }
4883
4884 fail:
4885#ifdef ONIG_DEBUG_SEARCH
4886 fprintf(stderr, "backward_search_range: fail.\n");
4887#endif
4888 return 0; /* fail */
4889}
4890
4891
4892extern OnigPosition
4893onig_search(regex_t* reg, const UChar* str, const UChar* end,
4894 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4895{
4896 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4897}
4898
4899extern OnigPosition
4900onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
4901 const UChar* global_pos,
4902 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4903{
4904 ptrdiff_t r;
4905 UChar *s, *prev;
4906 OnigMatchArg msa;
4907#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4908 const UChar *orig_start = start;
4909 const UChar *orig_range = range;
4910#endif
4911
4912#ifdef ONIG_DEBUG_SEARCH
4913 fprintf(stderr,
4914 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
4915 (uintptr_t )str, str, end - str, start - str, range - str);
4916#endif
4917
4918 if (region) {
4919 r = onig_region_resize_clear(region, reg->num_mem + 1);
4920 if (r) goto finish_no_msa;
4921 }
4922
4923 if (start > end || start < str) goto mismatch_no_msa;
4924
4925
4926#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4927# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4928# define MATCH_AND_RETURN_CHECK(upper_range) \
4929 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4930 if (r != ONIG_MISMATCH) {\
4931 if (r >= 0) {\
4932 if (! IS_FIND_LONGEST(reg->options)) {\
4933 goto match;\
4934 }\
4935 }\
4936 else goto finish; /* error */ \
4937 }
4938# else
4939# define MATCH_AND_RETURN_CHECK(upper_range) \
4940 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4941 if (r != ONIG_MISMATCH) {\
4942 if (r >= 0) {\
4943 goto match;\
4944 }\
4945 else goto finish; /* error */ \
4946 }
4947# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4948#else
4949# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4950# define MATCH_AND_RETURN_CHECK(none) \
4951 r = match_at(reg, str, end, s, prev, &msa);\
4952 if (r != ONIG_MISMATCH) {\
4953 if (r >= 0) {\
4954 if (! IS_FIND_LONGEST(reg->options)) {\
4955 goto match;\
4956 }\
4957 }\
4958 else goto finish; /* error */ \
4959 }
4960# else
4961# define MATCH_AND_RETURN_CHECK(none) \
4962 r = match_at(reg, str, end, s, prev, &msa);\
4963 if (r != ONIG_MISMATCH) {\
4964 if (r >= 0) {\
4965 goto match;\
4966 }\
4967 else goto finish; /* error */ \
4968 }
4969# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4970#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
4971
4972
4973 /* anchor optimize: resume search range */
4974 if (reg->anchor != 0 && str < end) {
4975 UChar *min_semi_end, *max_semi_end;
4976
4977 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4978 /* search start-position only */
4979 begin_position:
4980 if (range > start)
4981 {
4982 if (global_pos > start)
4983 {
4984 if (global_pos < range)
4985 range = global_pos + 1;
4986 }
4987 else
4988 range = start + 1;
4989 }
4990 else
4991 range = start;
4992 }
4993 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
4994 /* search str-position only */
4995 if (range > start) {
4996 if (start != str) goto mismatch_no_msa;
4997 range = str + 1;
4998 }
4999 else {
5000 if (range <= str) {
5001 start = str;
5002 range = str;
5003 }
5004 else
5005 goto mismatch_no_msa;
5006 }
5007 }
5008 else if (reg->anchor & ANCHOR_END_BUF) {
5009 min_semi_end = max_semi_end = (UChar* )end;
5010
5011 end_buf:
5012 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
5013 goto mismatch_no_msa;
5014
5015 if (range > start) {
5016 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
5017 start = min_semi_end - reg->anchor_dmax;
5018 if (start < end)
5019 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5020 }
5021 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5022 range = max_semi_end - reg->anchor_dmin + 1;
5023 }
5024
5025 if (start > range) goto mismatch_no_msa;
5026 /* If start == range, match with empty at end.
5027 Backward search is used. */
5028 }
5029 else {
5030 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
5031 range = min_semi_end - reg->anchor_dmax;
5032 }
5033 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
5034 start = max_semi_end - reg->anchor_dmin;
5035 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5036 }
5037 if (range > start) goto mismatch_no_msa;
5038 }
5039 }
5040 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5041 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5042
5043 max_semi_end = (UChar* )end;
5044 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5045 min_semi_end = pre_end;
5046
5047#ifdef USE_CRNL_AS_LINE_TERMINATOR
5048 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5049 if (IS_NOT_NULL(pre_end) &&
5050 IS_NEWLINE_CRLF(reg->options) &&
5051 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5052 min_semi_end = pre_end;
5053 }
5054#endif
5055 if (min_semi_end > str && start <= min_semi_end) {
5056 goto end_buf;
5057 }
5058 }
5059 else {
5060 min_semi_end = (UChar* )end;
5061 goto end_buf;
5062 }
5063 }
5064 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5065 goto begin_position;
5066 }
5067 }
5068 else if (str == end) { /* empty string */
5069 static const UChar address_for_empty_string[] = "";
5070
5071#ifdef ONIG_DEBUG_SEARCH
5072 fprintf(stderr, "onig_search: empty string.\n");
5073#endif
5074
5075 if (reg->threshold_len == 0) {
5076 start = end = str = address_for_empty_string;
5077 s = (UChar* )start;
5078 prev = (UChar* )NULL;
5079
5080 MATCH_ARG_INIT(msa, option, region, start, start);
5081#ifdef USE_COMBINATION_EXPLOSION_CHECK
5082 msa.state_check_buff = (void* )0;
5083 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
5084#endif
5085 MATCH_AND_RETURN_CHECK(end);
5086 goto mismatch;
5087 }
5088 goto mismatch_no_msa;
5089 }
5090
5091#ifdef ONIG_DEBUG_SEARCH
5092 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5093 (int )(end - str), (int )(start - str), (int )(range - str));
5094#endif
5095
5096 MATCH_ARG_INIT(msa, option, region, start, global_pos);
5097#ifdef USE_COMBINATION_EXPLOSION_CHECK
5098 {
5099 ptrdiff_t offset = (MIN(start, range) - str);
5100 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5101 }
5102#endif
5103
5104 s = (UChar* )start;
5105 if (range > start) { /* forward search */
5106 if (s > str)
5107 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5108 else
5109 prev = (UChar* )NULL;
5110
5111 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5112 UChar *sch_range, *low, *high, *low_prev;
5113
5114 sch_range = (UChar* )range;
5115 if (reg->dmax != 0) {
5116 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5117 sch_range = (UChar* )end;
5118 else {
5119 sch_range += reg->dmax;
5120 if (sch_range > end) sch_range = (UChar* )end;
5121 }
5122 }
5123
5124 if ((end - start) < reg->threshold_len)
5125 goto mismatch;
5126
5127 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5128 do {
5129 if (! forward_search_range(reg, str, end, s, sch_range,
5130 &low, &high, &low_prev)) goto mismatch;
5131 if (s < low) {
5132 s = low;
5133 prev = low_prev;
5134 }
5135 while (s <= high) {
5136 MATCH_AND_RETURN_CHECK(orig_range);
5137 prev = s;
5138 s += enclen(reg->enc, s, end);
5139 }
5140 } while (s < range);
5141 goto mismatch;
5142 }
5143 else { /* check only. */
5144 if (! forward_search_range(reg, str, end, s, sch_range,
5145 &low, &high, (UChar** )NULL)) goto mismatch;
5146
5147 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5148 do {
5149 MATCH_AND_RETURN_CHECK(orig_range);
5150 prev = s;
5151 s += enclen(reg->enc, s, end);
5152
5153 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5154 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5155 && s < range) {
5156 prev = s;
5157 s += enclen(reg->enc, s, end);
5158 }
5159 }
5160 } while (s < range);
5161 goto mismatch;
5162 }
5163 }
5164 }
5165
5166 do {
5167 MATCH_AND_RETURN_CHECK(orig_range);
5168 prev = s;
5169 s += enclen(reg->enc, s, end);
5170 } while (s < range);
5171
5172 if (s == range) { /* because empty match with /$/. */
5173 MATCH_AND_RETURN_CHECK(orig_range);
5174 }
5175 }
5176 else { /* backward search */
5177 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5178 UChar *low, *high, *adjrange, *sch_start;
5179
5180 if (range < end)
5181 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5182 else
5183 adjrange = (UChar* )end;
5184
5185 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5186 (end - range) >= reg->threshold_len) {
5187 do {
5188 sch_start = s + reg->dmax;
5189 if (sch_start > end) sch_start = (UChar* )end;
5190 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5191 &low, &high) <= 0)
5192 goto mismatch;
5193
5194 if (s > high)
5195 s = high;
5196
5197 while (s >= low) {
5198 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5199 MATCH_AND_RETURN_CHECK(orig_start);
5200 s = prev;
5201 }
5202 } while (s >= range);
5203 goto mismatch;
5204 }
5205 else { /* check only. */
5206 if ((end - range) < reg->threshold_len) goto mismatch;
5207
5208 sch_start = s;
5209 if (reg->dmax != 0) {
5210 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5211 sch_start = (UChar* )end;
5212 else {
5213 sch_start += reg->dmax;
5214 if (sch_start > end) sch_start = (UChar* )end;
5215 else
5216 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5217 start, sch_start, end);
5218 }
5219 }
5220 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5221 &low, &high) <= 0) goto mismatch;
5222 }
5223 }
5224
5225 do {
5226 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5227 MATCH_AND_RETURN_CHECK(orig_start);
5228 s = prev;
5229 } while (s >= range);
5230 }
5231
5232 mismatch:
5233#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5234 if (IS_FIND_LONGEST(reg->options)) {
5235 if (msa.best_len >= 0) {
5236 s = msa.best_s;
5237 goto match;
5238 }
5239 }
5240#endif
5241 r = ONIG_MISMATCH;
5242
5243 finish:
5244 MATCH_ARG_FREE(msa);
5245
5246 /* If result is mismatch and no FIND_NOT_EMPTY option,
5247 then the region is not set in match_at(). */
5248 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5249 onig_region_clear(region);
5250 }
5251
5252#ifdef ONIG_DEBUG
5253 if (r != ONIG_MISMATCH)
5254 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5255#endif
5256 return r;
5257
5258 mismatch_no_msa:
5259 r = ONIG_MISMATCH;
5260 finish_no_msa:
5261#ifdef ONIG_DEBUG
5262 if (r != ONIG_MISMATCH)
5263 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5264#endif
5265 return r;
5266
5267 match:
5268 MATCH_ARG_FREE(msa);
5269 return s - str;
5270}
5271
5272extern OnigPosition
5273onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5274 OnigRegion* region, OnigOptionType option,
5275 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
5276 void* callback_arg)
5277{
5278 OnigPosition r;
5279 OnigPosition n;
5280 int rs;
5281 const UChar* start;
5282
5283 n = 0;
5284 start = str;
5285 while (1) {
5286 r = onig_search(reg, str, end, start, end, region, option);
5287 if (r >= 0) {
5288 rs = scan_callback(n, r, region, callback_arg);
5289 n++;
5290 if (rs != 0)
5291 return rs;
5292
5293 if (region->end[0] == start - str) {
5294 if (start >= end) break;
5295 start += enclen(reg->enc, start, end);
5296 }
5297 else
5298 start = str + region->end[0];
5299
5300 if (start > end)
5301 break;
5302 }
5303 else if (r == ONIG_MISMATCH) {
5304 break;
5305 }
5306 else { /* error */
5307 return r;
5308 }
5309 }
5310
5311 return n;
5312}
5313
5314extern OnigEncoding
5315onig_get_encoding(const regex_t* reg)
5316{
5317 return reg->enc;
5318}
5319
5320extern OnigOptionType
5321onig_get_options(const regex_t* reg)
5322{
5323 return reg->options;
5324}
5325
5326extern OnigCaseFoldType
5327onig_get_case_fold_flag(const regex_t* reg)
5328{
5329 return reg->case_fold_flag;
5330}
5331
5332extern const OnigSyntaxType*
5333onig_get_syntax(const regex_t* reg)
5334{
5335 return reg->syntax;
5336}
5337
5338extern int
5339onig_number_of_captures(const regex_t* reg)
5340{
5341 return reg->num_mem;
5342}
5343
5344extern int
5345onig_number_of_capture_histories(const regex_t* reg)
5346{
5347#ifdef USE_CAPTURE_HISTORY
5348 int i, n;
5349
5350 n = 0;
5351 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5352 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
5353 n++;
5354 }
5355 return n;
5356#else
5357 return 0;
5358#endif
5359}
5360
5361extern void
5362onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
5363{
5364 *to = *from;
5365}
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
Definition: defines.h:89
#define xfree
Old name of ruby_xfree.
Definition: xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition: xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition: xmalloc.h:53
Definition: win32.h:698