regex.c (18331B)
1 /* $Id$ */ 2 /* 3 * regex: Regular expression pattern match library 4 * 5 * by A.ITO, December 1989 6 * Revised by A.ITO, January 2002 7 */ 8 9 #ifdef REGEX_DEBUG 10 #include <sys/types.h> 11 #include <malloc.h> 12 #endif /* REGEX_DEBUG */ 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <string.h> 16 #include <gc.h> 17 #include "config.h" 18 #ifdef USE_M17N 19 #include "wc.h" 20 #include "wtf.h" 21 #ifdef USE_UNICODE 22 #include "ucs.h" 23 #endif 24 #endif 25 #include "regex.h" 26 #include "config.h" 27 #include "myctype.h" 28 29 #ifndef NULL 30 #define NULL 0 31 #endif /* not NULL */ 32 33 #define RE_ITER_LIMIT 65535 34 35 #define RE_MATCHMODE 0x07 36 #define RE_NORMAL 0x00 37 #define RE_ANY 0x01 38 #define RE_WHICH 0x02 39 #define RE_EXCEPT 0x03 40 #define RE_SUBREGEX 0x04 41 #define RE_BEGIN 0x05 42 #define RE_END 0x06 43 #define RE_ENDMARK 0x07 44 45 #define RE_OPT 0x08 46 #define RE_ANYTIME 0x10 47 #define RE_IGNCASE 0x40 48 49 #define RE_MODE(x) ((x)->mode&RE_MATCHMODE) 50 #define RE_SET_MODE(x,v) ((x)->mode = (((x)->mode&~RE_MATCHMODE)|((v)&RE_MATCHMODE))) 51 52 #ifdef REGEX_DEBUG 53 void debugre(regexchar *); 54 char *lc2c(longchar *, int); 55 int verbose; 56 #endif /* REGEX_DEBUG */ 57 58 #ifdef USE_M17N 59 #define get_mclen(c) wtf_len1((wc_uchar *)(c)) 60 #else 61 #define get_mclen(c) 1 62 #endif 63 64 #ifndef TOLOWER 65 #include <ctype.h> 66 #define TOLOWER(x) tolower(x) 67 #define TOUPPER(x) toupper(x) 68 #endif 69 70 #define RE_TYPE_END 0 71 #define RE_TYPE_CHAR 1 72 #define RE_TYPE_WCHAR_T 2 73 #define RE_WHICH_RANGE 3 74 #define RE_TYPE_SYMBOL 4 75 76 static longchar 77 set_longchar(char *str) 78 { 79 unsigned char *p = (unsigned char *)str; 80 longchar r; 81 82 #ifdef USE_M17N 83 if (*p & 0x80) { 84 r.wch = wtf_parse1(&p); 85 if (r.wch.ccs == WC_CCS_SPECIAL || r.wch.ccs == WC_CCS_SPECIAL_W) { 86 r.type = RE_TYPE_SYMBOL; 87 return r; 88 } 89 #ifdef USE_UNICODE 90 if (WC_CCS_IS_UNICODE(r.wch.ccs)) { 91 if (WC_CCS_SET(r.wch.ccs) == WC_CCS_UCS_TAG) 92 r.wch.code = wc_ucs_tag_to_ucs(r.wch.code); 93 r.wch.ccs = WC_CCS_UCS4; 94 } 95 else 96 #endif 97 r.wch.ccs = WC_CCS_SET(r.wch.ccs); 98 r.type = RE_TYPE_WCHAR_T; 99 return r; 100 } 101 #endif 102 r.ch = *p; 103 r.type = RE_TYPE_CHAR; 104 return r; 105 } 106 107 static Regex DefaultRegex; 108 #define CompiledRegex DefaultRegex.re 109 #define Cstorage DefaultRegex.storage 110 111 static int regmatch(regexchar *, char *, char *, int, char **); 112 static int regmatch1(regexchar *, longchar *); 113 static int matchWhich(longchar *, longchar *, int); 114 static int match_longchar(longchar *, longchar *, int); 115 static int match_range_longchar(longchar *, longchar *, longchar *, int); 116 117 /* 118 * regexCompile: compile regular expression 119 */ 120 char * 121 regexCompile(char *ex, int igncase) 122 { 123 char *msg; 124 newRegex(ex, igncase, &DefaultRegex, &msg); 125 return msg; 126 } 127 128 static Regex * 129 newRegex0(char **ex, int igncase, Regex *regex, char **msg, int level) 130 { 131 char *p; 132 longchar *r; 133 regexchar *re; 134 int m; 135 longchar *st_ptr; 136 137 if (regex == NULL) 138 regex = (Regex *)GC_malloc(sizeof(Regex)); 139 regex->alt_regex = NULL; 140 re = regex->re; 141 st_ptr = regex->storage; 142 for (p = *ex; *p != '\0'; p++) { 143 re->mode = 0; 144 switch (*p) { 145 case '.': 146 re->p.pattern = NULL; 147 RE_SET_MODE(re, RE_ANY); 148 re++; 149 break; 150 case '$': 151 re->p.pattern = NULL; 152 RE_SET_MODE(re, RE_END); 153 re++; 154 break; 155 case '^': 156 re->p.pattern = NULL; 157 RE_SET_MODE(re, RE_BEGIN); 158 re++; 159 break; 160 case '+': 161 if (re == regex->re || 162 (RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) { 163 if (msg) 164 *msg = "Invalid regular expression"; 165 return NULL; 166 } 167 *re = *(re - 1); 168 re->mode |= RE_ANYTIME; 169 re++; 170 break; 171 case '*': 172 if (re == regex->re || 173 (RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) { 174 if (msg) 175 *msg = "Invalid regular expression"; 176 return NULL; 177 } 178 (re - 1)->mode |= RE_ANYTIME; 179 break; 180 case '?': 181 if (re == regex->re || 182 (RE_MODE(re - 1) != RE_ANY && (re - 1)->p.pattern == NULL)) { 183 if (msg) 184 *msg = "Invalid regular expression"; 185 return NULL; 186 } 187 (re - 1)->mode |= RE_OPT; 188 break; 189 case '[': 190 r = st_ptr; 191 if (*++p == '^') { 192 p++; 193 m = RE_EXCEPT; 194 } 195 else 196 m = RE_WHICH; 197 if (*p == '-' || *p == ']') 198 *(st_ptr++) = set_longchar(p); 199 while (*p != ']') { 200 if (*p == '\\') { 201 p++; 202 *(st_ptr++) = set_longchar(p); 203 p += get_mclen(p); 204 } 205 else if (*p == '-' && *(p + 1) != ']') { 206 (st_ptr++)->type = RE_WHICH_RANGE; 207 p++; 208 } 209 else if (*p == '\0') { 210 if (msg) 211 *msg = "Missing ]"; 212 return NULL; 213 } 214 else { 215 *(st_ptr++) = set_longchar(p); 216 p += get_mclen(p); 217 } 218 if (st_ptr >= ®ex->storage[STORAGE_MAX]) { 219 if (msg) 220 *msg = "Regular expression too long"; 221 return NULL; 222 } 223 } 224 (st_ptr++)->type = RE_TYPE_END; 225 re->p.pattern = r; 226 RE_SET_MODE(re, m); 227 if (igncase) 228 re->mode |= RE_IGNCASE; 229 re++; 230 break; 231 case '|': 232 RE_SET_MODE(re, RE_ENDMARK); 233 re++; 234 p++; 235 regex->alt_regex = newRegex0(&p, igncase, NULL, msg, level); 236 if (regex->alt_regex == NULL) 237 return NULL; 238 *ex = p; 239 return regex; 240 case '(': 241 RE_SET_MODE(re, RE_SUBREGEX); 242 p++; 243 re->p.sub = newRegex0(&p, igncase, NULL, msg, level + 1); 244 if (re->p.sub == NULL) 245 return NULL; 246 re++; 247 break; 248 case ')': 249 if (level == 0) { 250 if (msg) 251 *msg = "Too many ')'"; 252 return NULL; 253 } 254 RE_SET_MODE(re, RE_ENDMARK); 255 re++; 256 *ex = p; 257 return regex; 258 case '\\': 259 p++; 260 default: 261 *(st_ptr) = set_longchar(p); 262 p += get_mclen(p) - 1; 263 re->p.pattern = st_ptr; 264 st_ptr++; 265 RE_SET_MODE(re, RE_NORMAL); 266 if (igncase) 267 re->mode |= RE_IGNCASE; 268 re++; 269 } 270 if (st_ptr >= ®ex->storage[STORAGE_MAX] || 271 re >= ®ex->re[REGEX_MAX]) { 272 if (msg) 273 *msg = "Regular expression too long"; 274 return NULL; 275 } 276 } 277 RE_SET_MODE(re, RE_ENDMARK); 278 if (msg) 279 *msg = NULL; 280 *ex = p; 281 return regex; 282 } 283 284 Regex * 285 newRegex(char *ex, int igncase, Regex *regex, char **msg) 286 { 287 return newRegex0(&ex, igncase, regex, msg, 0); 288 } 289 290 /* 291 * regexMatch: match regular expression 292 */ 293 int 294 regexMatch(char *str, int len, int firstp) 295 { 296 return RegexMatch(&DefaultRegex, str, len, firstp); 297 } 298 299 int 300 RegexMatch(Regex *re, char *str, int len, int firstp) 301 { 302 char *p, *ep; 303 char *lpos; 304 Regex *r; 305 306 if (str == NULL) 307 return 0; 308 if (len < 0) 309 len = strlen(str); 310 re->position = NULL; 311 ep = str + len; 312 for (p = str; p <= ep; p++) { 313 lpos = NULL; 314 re->lposition = NULL; 315 for (r = re; r != NULL; r = r->alt_regex) { 316 switch (regmatch(r->re, p, ep, firstp && (p == str), &lpos)) { 317 case 1: /* matched */ 318 re->position = p; 319 if (re->lposition == NULL || re->lposition < lpos) 320 re->lposition = lpos; 321 break; 322 case -1: /* error */ 323 re->position = NULL; 324 return -1; 325 } 326 } 327 if (re->lposition != NULL) { 328 /* matched */ 329 return 1; 330 } 331 p += get_mclen(p) - 1; 332 } 333 return 0; 334 } 335 336 /* 337 * matchedPosition: last matched position 338 */ 339 void 340 MatchedPosition(Regex *re, char **first, char **last) 341 { 342 *first = re->position; 343 *last = re->lposition; 344 } 345 346 void 347 matchedPosition(char **first, char **last) 348 { 349 *first = DefaultRegex.position; 350 *last = DefaultRegex.lposition; 351 } 352 353 /* 354 * Intermal routines 355 */ 356 357 struct MatchingContext1 { 358 int label; 359 regexchar *re; 360 char *lastpos; 361 char *str; 362 int iter_limit; 363 int n_any; 364 int firstp; 365 char *end_p; 366 Regex *sub_regex; 367 struct MatchingContext1 *sub_ctx; 368 struct MatchingContext2 *ctx2; 369 }; 370 371 struct MatchingContext2 { 372 int label; 373 Regex *regex; 374 char *lastpos; 375 struct MatchingContext1 *ctx; 376 struct MatchingContext2 *ctx2; 377 char *str; 378 int n_any; 379 int firstp; 380 }; 381 382 383 #define YIELD(retval,context,lnum) (context)->label = lnum; return (retval); label##lnum: 384 385 static int regmatch_iter(struct MatchingContext1 *, 386 regexchar *, char *, char *, int); 387 388 static int 389 regmatch_sub_anytime(struct MatchingContext2 *c, Regex *regex, 390 regexchar * pat2, 391 char *str, char *end_p, int iter_limit, int firstp) 392 { 393 switch (c->label) { 394 case 1: 395 goto label1; 396 case 2: 397 goto label2; 398 case 3: 399 goto label3; 400 } 401 c->ctx = GC_malloc(sizeof(struct MatchingContext1)); 402 c->ctx2 = GC_malloc(sizeof(struct MatchingContext2)); 403 c->ctx->label = 0; 404 c->regex = regex; 405 c->n_any = 0; 406 c->str = str; 407 c->firstp = firstp; 408 for (;;) { 409 c->ctx->label = 0; 410 while (regmatch_iter(c->ctx, c->regex->re, c->str, end_p, c->firstp)) { 411 c->n_any = c->ctx->lastpos - c->str; 412 if (c->n_any <= 0) 413 continue; 414 c->firstp = 0; 415 if (RE_MODE(pat2) == RE_ENDMARK) { 416 c->lastpos = c->str + c->n_any; 417 YIELD(1, c, 1); 418 } 419 else if (regmatch(pat2, c->str + c->n_any, end_p, 420 c->firstp, &c->lastpos) == 1) { 421 YIELD(1, c, 2); 422 } 423 if (iter_limit == 1) 424 continue; 425 c->ctx2->label = 0; 426 while (regmatch_sub_anytime(c->ctx2, regex, pat2, 427 c->str + c->n_any, end_p, 428 iter_limit - 1, c->firstp)) { 429 430 c->lastpos = c->ctx2->lastpos; 431 YIELD(1, c, 3); 432 } 433 } 434 if (c->regex->alt_regex == NULL) 435 break; 436 c->regex = c->regex->alt_regex; 437 } 438 return 0; 439 } 440 441 static int 442 regmatch_iter(struct MatchingContext1 *c, 443 regexchar * re, char *str, char *end_p, int firstp) 444 { 445 switch (c->label) { 446 case 1: 447 goto label1; 448 case 2: 449 goto label2; 450 case 3: 451 goto label3; 452 case 4: 453 goto label4; 454 case 5: 455 goto label5; 456 case 6: 457 goto label6; 458 case 7: 459 goto label7; 460 } 461 if (RE_MODE(re) == RE_ENDMARK) 462 return 0; 463 c->re = re; 464 c->firstp = firstp; 465 c->str = str; 466 c->end_p = end_p; 467 c->sub_ctx = NULL; 468 c->lastpos = NULL; 469 while (RE_MODE(c->re) != RE_ENDMARK) { 470 if (c->re->mode & (RE_ANYTIME | RE_OPT)) { 471 if (c->re->mode & RE_ANYTIME) 472 c->iter_limit = RE_ITER_LIMIT; 473 else 474 c->iter_limit = 1; 475 c->n_any = -1; 476 while (c->n_any < c->iter_limit) { 477 if (c->str + c->n_any >= c->end_p) { 478 return 0; 479 } 480 if (c->n_any >= 0) { 481 if (RE_MODE(c->re) == RE_SUBREGEX) { 482 c->ctx2 = GC_malloc(sizeof(struct MatchingContext2)); 483 c->ctx2->label = 0; 484 while (regmatch_sub_anytime(c->ctx2, 485 c->re->p.sub, 486 c->re + 1, 487 c->str + c->n_any, 488 c->end_p, 489 c->iter_limit, 490 c->firstp)) { 491 c->n_any = c->ctx2->lastpos - c->str; 492 c->lastpos = c->ctx2->lastpos; 493 YIELD(1, c, 1); 494 } 495 return 0; 496 } 497 else { 498 longchar k; 499 k = set_longchar(c->str + c->n_any); 500 if (regmatch1(c->re, &k)) { 501 c->n_any += get_mclen(c->str + c->n_any); 502 } 503 else { 504 return 0; 505 } 506 c->firstp = 0; 507 } 508 } 509 else 510 c->n_any++; 511 if (RE_MODE(c->re + 1) == RE_ENDMARK) { 512 c->lastpos = c->str + c->n_any; 513 YIELD(1, c, 2); 514 } 515 else if (regmatch(c->re + 1, c->str + c->n_any, c->end_p, 516 c->firstp, &c->lastpos) == 1) { 517 YIELD(1, c, 3); 518 } 519 } 520 return 0; 521 } 522 /* regexp other than pat*, pat+ and pat? */ 523 switch (RE_MODE(c->re)) { 524 case RE_BEGIN: 525 if (!c->firstp) 526 return 0; 527 c->re++; 528 break; 529 case RE_END: 530 if (c->str >= c->end_p) { 531 c->lastpos = c->str; 532 c->re++; 533 YIELD(1, c, 4); 534 } 535 else { 536 c->lastpos = NULL; 537 return 0; 538 } 539 break; 540 case RE_SUBREGEX: 541 if (c->sub_ctx == NULL) { 542 c->sub_ctx = GC_malloc(sizeof(struct MatchingContext1)); 543 } 544 c->sub_regex = c->re->p.sub; 545 for (;;) { 546 c->sub_ctx->label = 0; 547 while (regmatch_iter(c->sub_ctx, c->sub_regex->re, 548 c->str, c->end_p, c->firstp)) { 549 if (c->sub_ctx->lastpos != c->str) 550 c->firstp = 0; 551 if (RE_MODE(c->re + 1) == RE_ENDMARK) { 552 c->lastpos = c->sub_ctx->lastpos; 553 YIELD(1, c, 5); 554 } 555 else if (regmatch(c->re + 1, c->sub_ctx->lastpos, c->end_p, 556 c->firstp, &c->lastpos) == 1) { 557 YIELD(1, c, 6); 558 } 559 } 560 if (c->sub_regex->alt_regex == NULL) 561 break; 562 c->sub_regex = c->sub_regex->alt_regex; 563 } 564 return 0; 565 default: 566 { 567 longchar k; 568 k = set_longchar(c->str); 569 c->str += get_mclen(c->str); 570 if (!regmatch1(c->re, &k)) 571 return 0; 572 } 573 c->re++; 574 c->firstp = 0; 575 } 576 if (c->str > c->end_p) { 577 return 0; 578 } 579 } 580 c->lastpos = c->str; 581 #ifdef REGEX_DEBUG 582 if (verbose) 583 printf("Succeed: %s %d\n", c->str, c->lastpos - c->str); 584 #endif 585 YIELD(1, c, 7); 586 return 0; 587 } 588 589 static int 590 regmatch(regexchar * re, char *str, char *end_p, int firstp, char **lastpos) 591 { 592 struct MatchingContext1 contx; 593 594 *lastpos = NULL; 595 596 contx.label = 0; 597 while (regmatch_iter(&contx, re, str, end_p, firstp)) { 598 #ifdef REGEX_DEBUG 599 char *p; 600 if (verbose) { 601 printf("regmatch: matched <"); 602 for (p = str; p < contx.lastpos; p++) 603 putchar(*p); 604 printf(">\n"); 605 } 606 #endif 607 if (*lastpos == NULL || *lastpos < contx.lastpos) 608 *lastpos = contx.lastpos; 609 } 610 if (*lastpos == NULL) 611 return 0; 612 return 1; 613 } 614 615 616 static int 617 regmatch1(regexchar * re, longchar * c) 618 { 619 int ans; 620 621 #ifdef USE_M17N 622 if (c->type == RE_TYPE_SYMBOL) 623 return 0; 624 #endif 625 switch (RE_MODE(re)) { 626 case RE_ANY: 627 #ifdef REGEX_DEBUG 628 if (verbose) 629 printf("%s vs any. -> 1\n", lc2c(c, 1)); 630 #endif /* REGEX_DEBUG */ 631 return 1; 632 case RE_NORMAL: 633 ans = match_longchar(re->p.pattern, c, re->mode & RE_IGNCASE); 634 #ifdef REGEX_DEBUG 635 if (verbose) 636 printf("RE=%s vs %s -> %d\n", lc2c(re->p.pattern, 1), lc2c(c, 1), 637 ans); 638 #endif /* REGEX_DEBUG */ 639 return ans; 640 case RE_WHICH: 641 return matchWhich(re->p.pattern, c, re->mode & RE_IGNCASE); 642 case RE_EXCEPT: 643 return !matchWhich(re->p.pattern, c, re->mode & RE_IGNCASE); 644 } 645 return 0; 646 } 647 648 static int 649 matchWhich(longchar * pattern, longchar * c, int igncase) 650 { 651 longchar *p = pattern; 652 int ans = 0; 653 654 #ifdef REGEX_DEBUG 655 if (verbose) 656 printf("RE pattern = %s char=%s", lc2c(pattern, 10000), lc2c(c, 1)); 657 #endif /* REGEX_DEBUG */ 658 while (p->type != RE_TYPE_END) { 659 if ((p + 1)->type == RE_WHICH_RANGE && (p + 2)->type != RE_TYPE_END) { 660 if (match_range_longchar(p, p + 2, c, igncase)) { 661 ans = 1; 662 break; 663 } 664 p += 3; 665 } 666 else { 667 if (match_longchar(p, c, igncase)) { 668 ans = 1; 669 break; 670 } 671 p++; 672 } 673 } 674 #ifdef REGEX_DEBUG 675 if (verbose) 676 printf(" -> %d\n", ans); 677 #endif /* REGEX_DEBUG */ 678 return ans; 679 } 680 681 static int 682 match_longchar(longchar * a, longchar * b, int ignore) 683 { 684 #ifdef USE_M17N 685 if (a->type != b->type) 686 return 0; 687 if (a->type == RE_TYPE_WCHAR_T) { 688 #ifdef USE_UNICODE 689 if (ignore) { 690 wc_uint32 ua = wc_any_to_ucs(a->wch), ub = wc_any_to_ucs(b->wch); 691 return (ua == ub || 692 ua == wc_ucs_tolower(ub) || 693 ua == wc_ucs_toupper(ub) || 694 ua == wc_ucs_totitle(ub)); 695 } 696 #endif 697 return (a->wch.ccs == b->wch.ccs) && (a->wch.code == b->wch.code); 698 } 699 #endif 700 if (ignore && IS_ALPHA(b->ch)) 701 return (a->ch == TOLOWER(b->ch) || a->ch == TOUPPER(b->ch)); 702 else 703 return a->ch == b->ch; 704 } 705 706 static int 707 match_range_longchar(longchar * a, longchar * b, longchar * c, int ignore) 708 { 709 #ifdef USE_M17N 710 if (a->type != b->type || a->type != c->type) 711 return 0; 712 if (a->type == RE_TYPE_WCHAR_T) { 713 if (a->wch.ccs != c->wch.ccs || c->wch.ccs != b->wch.ccs) 714 return 0; 715 #ifdef USE_UNICODE 716 if (ignore) { 717 wc_uint32 uc = wc_any_to_ucs(c->wch); 718 719 if (wc_is_ucs_alpha(uc)) { 720 wc_uint32 ua = wc_any_to_ucs(a->wch); 721 wc_uint32 ub = wc_any_to_ucs(b->wch); 722 wc_uint32 upper = wc_ucs_toupper(uc); 723 wc_uint32 lower = wc_ucs_tolower(uc); 724 wc_uint32 title = wc_ucs_totitle(uc); 725 726 return ((ua <= upper && upper <= ub) || 727 (ua <= lower && lower <= ub) || 728 (ua <= title && title <= ub)); 729 } 730 } 731 #endif 732 return (a->wch.code <= c->wch.code && c->wch.code <= b->wch.code); 733 } 734 #endif 735 if (ignore && IS_ALPHA(c->ch)) 736 return ((a->ch <= TOLOWER(c->ch) && TOLOWER(c->ch) <= b->ch) || 737 (a->ch <= TOUPPER(c->ch) && TOUPPER(c->ch) <= b->ch)); 738 else 739 return (a->ch <= c->ch && c->ch <= b->ch); 740 } 741 742 #ifdef REGEX_DEBUG 743 char * 744 lc2c(longchar * x, int len) 745 { 746 static char y[100]; 747 int i = 0, j = 0; 748 char *r; 749 750 while (x[j].type != RE_TYPE_END && j < len) { 751 if (x[j].type == RE_WHICH_RANGE) 752 y[i++] = '-'; 753 #ifdef USE_M17N 754 else if (x[j].type == RE_TYPE_WCHAR_T) { 755 char buf[20]; 756 sprintf(buf, "[%x-%x]", x[j].wch.ccs, x[j].wch.code); 757 strcpy(&y[i], buf); 758 i += strlen(buf); 759 } 760 #endif 761 else 762 y[i++] = x[j].ch; 763 j++; 764 } 765 y[i] = '\0'; 766 r = GC_malloc_atomic(i + 1); 767 strcpy(r, y); 768 return r; 769 } 770 771 void 772 debugre(regexchar * re) 773 { 774 for (; RE_MODE(re) != RE_ENDMARK; re++) { 775 switch (RE_MODE(re)) { 776 case RE_BEGIN: 777 printf("Begin "); 778 continue; 779 case RE_END: 780 printf("End "); 781 continue; 782 } 783 if (re->mode & RE_ANYTIME) 784 printf("Anytime-"); 785 if (re->mode & RE_OPT) 786 printf("Opt-"); 787 788 switch (RE_MODE(re)) { 789 case RE_ANY: 790 printf("Any "); 791 break; 792 case RE_NORMAL: 793 printf("Match-to'%c' ", *re->p.pattern); 794 break; 795 case RE_WHICH: 796 printf("One-of\"%s\" ", lc2c(re->p.pattern, 10000)); 797 break; 798 case RE_EXCEPT: 799 printf("Other-than\"%s\" ", lc2c(re->p.pattern, 10000)); 800 break; 801 case RE_SUBREGEX: 802 { 803 Regex *r = re->p.sub; 804 printf("("); 805 while (r) { 806 debugre(r->re); 807 if (r->alt_regex) 808 printf(" | "); 809 r = r->alt_regex; 810 } 811 printf(")"); 812 break; 813 } 814 default: 815 printf("Unknown "); 816 } 817 } 818 } 819 820 #endif /* REGEX_DEBUG */ 821 822 #ifdef REGEXTEST 823 int 824 main(int argc, char **argv) 825 { 826 char buf[128], buf2[128]; 827 char *msg; 828 Regex *re; 829 char *fpos, *epos; 830 FILE *f = stdin; 831 int i = 1; 832 833 #ifdef USE_M17N 834 wtf_init(WC_CES_EUC_JP, WC_CES_EUC_JP); 835 #endif 836 #ifdef REGEX_DEBUG 837 for (i = 1; i < argc; i++) { 838 if (strcmp(argv[i], "-v") == 0) 839 verbose = 1; 840 else 841 break; 842 } 843 #endif 844 845 if (argc > i) 846 f = fopen(argv[i], "r"); 847 if (f == NULL) { 848 fprintf(stderr, "Can't open %s\n", argv[i]); 849 exit(1); 850 } 851 while (fscanf(f, "%s%s", buf, buf2) == 2) { 852 re = newRegex(buf, 0, NULL, &msg); 853 if (re == NULL) { 854 printf("Error on regexp /%s/: %s\n", buf, msg); 855 exit(1); 856 } 857 if (RegexMatch(re, buf2, -1, 1)) { 858 printf("/%s/\t\"%s\"\t\"", buf, buf2); 859 MatchedPosition(re, &fpos, &epos); 860 while (fpos < epos) 861 putchar(*(fpos++)); 862 putchar('"'); 863 } 864 else 865 printf("/%s/\t\"%s\"\tno_match", buf, buf2); 866 putchar('\n'); 867 } 868 /* notreatched */ 869 return 0; 870 } 871 #endif