00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #if defined(_WIN32)
00041 #pragma warning(disable: 4018 4101)
00042 #endif
00043
00044
00045 #if defined (_AIX) && !defined (REGEX_MALLOC)
00046 #pragma alloca
00047 #endif
00048
00049 #undef _GNU_SOURCE
00050 #define _GNU_SOURCE
00051
00052 #if defined(LINUX)
00053 #define STDC_HEADERS
00054 #endif
00055
00056 #if defined(STDC_HEADERS) && !defined(emacs)
00057 #include <stddef.h>
00058 #else
00059
00060 #include <sys/types.h>
00061 #endif
00062
00063
00064
00065 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
00066 # include <wctype.h>
00067 # include <wchar.h>
00068 #endif
00069
00070
00071 #undef HAVE_LIBINTL_H
00072 #if HAVE_LIBINTL_H || defined (_LIBC)
00073 # include <libintl.h>
00074 #else
00075 # define gettext(msgid) (msgid)
00076 #endif
00077
00078 #ifndef gettext_noop
00079
00080
00081 #define gettext_noop(String) String
00082 #endif
00083
00084
00085
00086 #ifdef emacs
00087
00088 #include "lisp.h"
00089 #include "buffer.h"
00090 #include "syntax.h"
00091
00092 #else
00093
00094
00095
00096
00097 #undef REL_ALLOC
00098
00099 #if defined (STDC_HEADERS) || defined (_LIBC) || defined(_WIN32)
00100 #include <stdlib.h>
00101 #else
00102 char *malloc ();
00103 char *realloc ();
00104 void free();
00105 #endif
00106
00107
00108
00109 #ifdef INHIBIT_STRING_HEADER
00110 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
00111 #if !defined (bzero) && !defined (bcopy)
00112 #undef INHIBIT_STRING_HEADER
00113 #endif
00114 #endif
00115 #endif
00116
00117 #include <string.h>
00118
00119
00120
00121
00122 #ifndef INHIBIT_STRING_HEADER
00123 #if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC) || defined (_WIN32)
00124 #ifndef bcmp
00125 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
00126 #endif
00127 #ifndef bcopy
00128 #define bcopy(s, d, n) memcpy ((d), (s), (n))
00129 #endif
00130 #ifndef bzero
00131 #define bzero(s, n) memset ((s), 0, (n))
00132 #endif
00133 #else
00134 #include <strings.h>
00135 #endif
00136 #endif
00137
00138
00139
00140
00141
00142 #ifndef Sword
00143 #define Sword 1
00144 #endif
00145
00146 #ifdef SWITCH_ENUM_BUG
00147 #define SWITCH_ENUM_CAST(x) ((int)(x))
00148 #else
00149 #define SWITCH_ENUM_CAST(x) (x)
00150 #endif
00151
00152 #ifdef SYNTAX_TABLE
00153
00154 extern char *re_syntax_table;
00155
00156 #else
00157
00158
00159 #define CHAR_SET_SIZE 256
00160
00161 static char re_syntax_table[CHAR_SET_SIZE];
00162
00163 static void
00164 init_syntax_once ()
00165 {
00166 register int c;
00167 static int done = 0;
00168
00169 if (done)
00170 return;
00171
00172 bzero (re_syntax_table, sizeof re_syntax_table);
00173
00174 for (c = 'a'; c <= 'z'; c++)
00175 re_syntax_table[c] = Sword;
00176
00177 for (c = 'A'; c <= 'Z'; c++)
00178 re_syntax_table[c] = Sword;
00179
00180 for (c = '0'; c <= '9'; c++)
00181 re_syntax_table[c] = Sword;
00182
00183 re_syntax_table['_'] = Sword;
00184
00185 done = 1;
00186 }
00187
00188 #endif
00189
00190 #define SYNTAX(c) re_syntax_table[c]
00191
00192 #endif
00193
00194
00195
00196 #include "regex_win32.h"
00197
00198
00199 #include <ctype.h>
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
00213 #define ISASCII(c) 1
00214 #else
00215 #define ISASCII(c) isascii(c)
00216 #endif
00217
00218 #ifdef isblank
00219 #define ISBLANK(c) (ISASCII (c) && isblank (c))
00220 #else
00221 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00222 #endif
00223 #ifdef isgraph
00224 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
00225 #else
00226 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
00227 #endif
00228
00229 #define ISPRINT(c) (ISASCII (c) && isprint (c))
00230 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
00231 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
00232 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
00233 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
00234 #define ISLOWER(c) (ISASCII (c) && islower (c))
00235 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
00236 #define ISSPACE(c) (ISASCII (c) && isspace (c))
00237 #define ISUPPER(c) (ISASCII (c) && isupper (c))
00238 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
00239
00240 #ifndef NULL
00241 #define NULL (void *)0
00242 #endif
00243
00244
00245
00246
00247
00248 #undef SIGN_EXTEND_CHAR
00249 #if __STDC__
00250 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00251 #else
00252
00253 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00254 #endif
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266 #if defined(REGEX_MALLOC) || defined(_WIN32)
00267
00268 #define REGEX_ALLOCATE malloc
00269 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00270 #define REGEX_FREE free
00271 #define REGEX_MALLOC
00272
00273 #else
00274
00275
00276 #ifndef alloca
00277
00278
00279 #ifdef __GNUC__
00280 #define alloca __builtin_alloca
00281 #else
00282 #if HAVE_ALLOCA_H
00283 #include <alloca.h>
00284 #else
00285 #if 0
00286 #ifndef _AIX
00287 char *alloca ();
00288 #endif
00289 #endif
00290 #endif
00291 #endif
00292
00293 #endif
00294
00295 #define REGEX_ALLOCATE alloca
00296
00297
00298 #define REGEX_REALLOCATE(source, osize, nsize) \
00299 (destination = (char *) alloca (nsize), \
00300 bcopy (source, destination, osize), \
00301 destination)
00302
00303
00304 #define REGEX_FREE(arg) ((void)0)
00305
00306 #endif
00307
00308
00309
00310 #if defined (REL_ALLOC) && defined (REGEX_MALLOC)
00311
00312 #define REGEX_ALLOCATE_STACK(size) \
00313 r_alloc (&failure_stack_ptr, (size))
00314 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \
00315 r_re_alloc (&failure_stack_ptr, (nsize))
00316 #define REGEX_FREE_STACK(ptr) \
00317 r_alloc_free (&failure_stack_ptr)
00318
00319 #else
00320
00321 #ifdef REGEX_MALLOC
00322
00323 #define REGEX_ALLOCATE_STACK malloc
00324 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
00325 #define REGEX_FREE_STACK free
00326
00327 #else
00328
00329 #define REGEX_ALLOCATE_STACK alloca
00330
00331 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \
00332 REGEX_REALLOCATE (source, osize, nsize)
00333
00334 #define REGEX_FREE_STACK(arg)
00335
00336 #endif
00337 #endif
00338
00339
00340
00341
00342
00343 #define FIRST_STRING_P(ptr) \
00344 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00345
00346
00347 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00348 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00349 #define RETALLOC_IF(addr, n, t) \
00350 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
00351 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00352
00353 #define BYTEWIDTH 8
00354
00355 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00356
00357 #undef MAX
00358 #undef MIN
00359 #define MAX(a, b) ((a) > (b) ? (a) : (b))
00360 #define MIN(a, b) ((a) < (b) ? (a) : (b))
00361
00362
00363 #define false 0
00364 #define true 1
00365
00366 static int
00367 re_match_2_internal(struct re_pattern_buffer *bufp,
00368 const char *string1,
00369 int size1,
00370 const char *string2,
00371 int size2,
00372 int pos,
00373 struct re_registers *regs,
00374 int stop);
00375
00376
00377
00378
00379
00380
00381 typedef enum
00382 {
00383 no_op = 0,
00384
00385
00386 succeed,
00387
00388
00389 exactn,
00390
00391
00392 anychar,
00393
00394
00395
00396
00397
00398
00399
00400 charset,
00401
00402
00403
00404 charset_not,
00405
00406
00407
00408
00409
00410
00411
00412
00413 start_memory,
00414
00415
00416
00417
00418
00419
00420
00421
00422 stop_memory,
00423
00424
00425
00426 duplicate,
00427
00428
00429 begline,
00430
00431
00432 endline,
00433
00434
00435
00436 begbuf,
00437
00438
00439 endbuf,
00440
00441
00442 jump,
00443
00444
00445 jump_past_alt,
00446
00447
00448
00449 on_failure_jump,
00450
00451
00452
00453 on_failure_keep_string_jump,
00454
00455
00456
00457 pop_failure_jump,
00458
00459
00460
00461
00462
00463
00464
00465
00466 maybe_pop_jump,
00467
00468
00469
00470
00471
00472
00473 dummy_failure_jump,
00474
00475
00476
00477 push_dummy_failure,
00478
00479
00480
00481 succeed_n,
00482
00483
00484
00485 jump_n,
00486
00487
00488
00489
00490 set_number_at,
00491
00492 wordchar,
00493 notwordchar,
00494
00495 wordbeg,
00496 wordend,
00497
00498 wordbound,
00499 notwordbound
00500
00501 #ifdef emacs
00502 ,before_dot,
00503 at_dot,
00504 after_dot,
00505
00506
00507
00508 syntaxspec,
00509
00510
00511 notsyntaxspec
00512 #endif
00513 } re_opcode_t;
00514
00515
00516
00517
00518
00519 #define STORE_NUMBER(destination, number) \
00520 do { \
00521 (destination)[0] = (number) & 0377; \
00522 (destination)[1] = (number) >> 8; \
00523 } while (0)
00524
00525
00526
00527
00528
00529 #define STORE_NUMBER_AND_INCR(destination, number) \
00530 do { \
00531 STORE_NUMBER (destination, number); \
00532 (destination) += 2; \
00533 } while (0)
00534
00535
00536
00537
00538 #define EXTRACT_NUMBER(destination, source) \
00539 do { \
00540 (destination) = *(source) & 0377; \
00541 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
00542 } while (0)
00543
00544 #ifdef DEBUG
00545 static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
00546 static void
00547 extract_number (dest, source)
00548 int *dest;
00549 unsigned char *source;
00550 {
00551 int temp = SIGN_EXTEND_CHAR (*(source + 1));
00552 *dest = *source & 0377;
00553 *dest += temp << 8;
00554 }
00555
00556 #ifndef EXTRACT_MACROS
00557 #undef EXTRACT_NUMBER
00558 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
00559 #endif
00560
00561 #endif
00562
00563
00564
00565
00566 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
00567 do { \
00568 EXTRACT_NUMBER (destination, source); \
00569 (source) += 2; \
00570 } while (0)
00571
00572 #ifdef DEBUG
00573 static void extract_number_and_incr _RE_ARGS ((int *destination,
00574 unsigned char **source));
00575 static void
00576 extract_number_and_incr (destination, source)
00577 int *destination;
00578 unsigned char **source;
00579 {
00580 extract_number (destination, *source);
00581 *source += 2;
00582 }
00583
00584 #ifndef EXTRACT_MACROS
00585 #undef EXTRACT_NUMBER_AND_INCR
00586 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
00587 extract_number_and_incr (&dest, &src)
00588 #endif
00589
00590 #endif
00591
00592
00593
00594
00595
00596
00597
00598 #ifdef DEBUG
00599
00600
00601 #include <stdio.h>
00602
00603
00604 #include <assert.h>
00605
00606 static int debug = 0;
00607
00608 #define DEBUG_STATEMENT(e) e
00609 #define DEBUG_PRINT1(x) if (debug) printf (x)
00610 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00611 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00612 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00613 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
00614 if (debug) print_partial_compiled_pattern (s, e)
00615 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
00616 if (debug) print_double_string (w, s1, sz1, s2, sz2)
00617
00618
00619
00620
00621 void
00622 print_fastmap (fastmap)
00623 char *fastmap;
00624 {
00625 unsigned was_a_range = 0;
00626 unsigned i = 0;
00627
00628 while (i < (1 << BYTEWIDTH))
00629 {
00630 if (fastmap[i++])
00631 {
00632 was_a_range = 0;
00633 putchar (i - 1);
00634 while (i < (1 << BYTEWIDTH) && fastmap[i])
00635 {
00636 was_a_range = 1;
00637 i++;
00638 }
00639 if (was_a_range)
00640 {
00641 printf ("-");
00642 putchar (i - 1);
00643 }
00644 }
00645 }
00646 putchar ('\n');
00647 }
00648
00649
00650
00651
00652
00653 void
00654 print_partial_compiled_pattern (start, end)
00655 unsigned char *start;
00656 unsigned char *end;
00657 {
00658 int mcnt, mcnt2;
00659 unsigned char *p1;
00660 unsigned char *p = start;
00661 unsigned char *pend = end;
00662
00663 if (start == NULL)
00664 {
00665 printf ("(null)\n");
00666 return;
00667 }
00668
00669
00670 while (p < pend)
00671 {
00672 printf ("%d:\t", p - start);
00673
00674 switch ((re_opcode_t) *p++)
00675 {
00676 case no_op:
00677 printf ("/no_op");
00678 break;
00679
00680 case exactn:
00681 mcnt = *p++;
00682 printf ("/exactn/%d", mcnt);
00683 do
00684 {
00685 putchar ('/');
00686 putchar (*p++);
00687 }
00688 while (--mcnt);
00689 break;
00690
00691 case start_memory:
00692 mcnt = *p++;
00693 printf ("/start_memory/%d/%d", mcnt, *p++);
00694 break;
00695
00696 case stop_memory:
00697 mcnt = *p++;
00698 printf ("/stop_memory/%d/%d", mcnt, *p++);
00699 break;
00700
00701 case duplicate:
00702 printf ("/duplicate/%d", *p++);
00703 break;
00704
00705 case anychar:
00706 printf ("/anychar");
00707 break;
00708
00709 case charset:
00710 case charset_not:
00711 {
00712 register int c, last = -100;
00713 register int in_range = 0;
00714
00715 printf ("/charset [%s",
00716 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
00717
00718 assert (p + *p < pend);
00719
00720 for (c = 0; c < 256; c++)
00721 if (c / 8 < *p
00722 && (p[1 + (c/8)] & (1 << (c % 8))))
00723 {
00724
00725 if (last + 1 == c && ! in_range)
00726 {
00727 putchar ('-');
00728 in_range = 1;
00729 }
00730
00731 else if (last + 1 != c && in_range)
00732 {
00733 putchar (last);
00734 in_range = 0;
00735 }
00736
00737 if (! in_range)
00738 putchar (c);
00739
00740 last = c;
00741 }
00742
00743 if (in_range)
00744 putchar (last);
00745
00746 putchar (']');
00747
00748 p += 1 + *p;
00749 }
00750 break;
00751
00752 case begline:
00753 printf ("/begline");
00754 break;
00755
00756 case endline:
00757 printf ("/endline");
00758 break;
00759
00760 case on_failure_jump:
00761 extract_number_and_incr (&mcnt, &p);
00762 printf ("/on_failure_jump to %d", p + mcnt - start);
00763 break;
00764
00765 case on_failure_keep_string_jump:
00766 extract_number_and_incr (&mcnt, &p);
00767 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
00768 break;
00769
00770 case dummy_failure_jump:
00771 extract_number_and_incr (&mcnt, &p);
00772 printf ("/dummy_failure_jump to %d", p + mcnt - start);
00773 break;
00774
00775 case push_dummy_failure:
00776 printf ("/push_dummy_failure");
00777 break;
00778
00779 case maybe_pop_jump:
00780 extract_number_and_incr (&mcnt, &p);
00781 printf ("/maybe_pop_jump to %d", p + mcnt - start);
00782 break;
00783
00784 case pop_failure_jump:
00785 extract_number_and_incr (&mcnt, &p);
00786 printf ("/pop_failure_jump to %d", p + mcnt - start);
00787 break;
00788
00789 case jump_past_alt:
00790 extract_number_and_incr (&mcnt, &p);
00791 printf ("/jump_past_alt to %d", p + mcnt - start);
00792 break;
00793
00794 case jump:
00795 extract_number_and_incr (&mcnt, &p);
00796 printf ("/jump to %d", p + mcnt - start);
00797 break;
00798
00799 case succeed_n:
00800 extract_number_and_incr (&mcnt, &p);
00801 p1 = p + mcnt;
00802 extract_number_and_incr (&mcnt2, &p);
00803 printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
00804 break;
00805
00806 case jump_n:
00807 extract_number_and_incr (&mcnt, &p);
00808 p1 = p + mcnt;
00809 extract_number_and_incr (&mcnt2, &p);
00810 printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
00811 break;
00812
00813 case set_number_at:
00814 extract_number_and_incr (&mcnt, &p);
00815 p1 = p + mcnt;
00816 extract_number_and_incr (&mcnt2, &p);
00817 printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
00818 break;
00819
00820 case wordbound:
00821 printf ("/wordbound");
00822 break;
00823
00824 case notwordbound:
00825 printf ("/notwordbound");
00826 break;
00827
00828 case wordbeg:
00829 printf ("/wordbeg");
00830 break;
00831
00832 case wordend:
00833 printf ("/wordend");
00834
00835 #ifdef emacs
00836 case before_dot:
00837 printf ("/before_dot");
00838 break;
00839
00840 case at_dot:
00841 printf ("/at_dot");
00842 break;
00843
00844 case after_dot:
00845 printf ("/after_dot");
00846 break;
00847
00848 case syntaxspec:
00849 printf ("/syntaxspec");
00850 mcnt = *p++;
00851 printf ("/%d", mcnt);
00852 break;
00853
00854 case notsyntaxspec:
00855 printf ("/notsyntaxspec");
00856 mcnt = *p++;
00857 printf ("/%d", mcnt);
00858 break;
00859 #endif
00860
00861 case wordchar:
00862 printf ("/wordchar");
00863 break;
00864
00865 case notwordchar:
00866 printf ("/notwordchar");
00867 break;
00868
00869 case begbuf:
00870 printf ("/begbuf");
00871 break;
00872
00873 case endbuf:
00874 printf ("/endbuf");
00875 break;
00876
00877 default:
00878 printf ("?%d", *(p-1));
00879 }
00880
00881 putchar ('\n');
00882 }
00883
00884 printf ("%d:\tend of pattern.\n", p - start);
00885 }
00886
00887
00888 void
00889 print_compiled_pattern (bufp)
00890 struct re_pattern_buffer *bufp;
00891 {
00892 unsigned char *buffer = bufp->buffer;
00893
00894 print_partial_compiled_pattern (buffer, buffer + bufp->used);
00895 printf ("%ld bytes used/%ld bytes allocated.\n",
00896 bufp->used, bufp->allocated);
00897
00898 if (bufp->fastmap_accurate && bufp->fastmap)
00899 {
00900 printf ("fastmap: ");
00901 print_fastmap (bufp->fastmap);
00902 }
00903
00904 printf ("re_nsub: %d\t", bufp->re_nsub);
00905 printf ("regs_alloc: %d\t", bufp->regs_allocated);
00906 printf ("can_be_null: %d\t", bufp->can_be_null);
00907 printf ("newline_anchor: %d\n", bufp->newline_anchor);
00908 printf ("no_sub: %d\t", bufp->no_sub);
00909 printf ("not_bol: %d\t", bufp->not_bol);
00910 printf ("not_eol: %d\t", bufp->not_eol);
00911 printf ("syntax: %lx\n", bufp->syntax);
00912
00913 }
00914
00915
00916 void
00917 print_double_string (where, string1, size1, string2, size2)
00918 const char *where;
00919 const char *string1;
00920 const char *string2;
00921 int size1;
00922 int size2;
00923 {
00924 int this_char;
00925
00926 if (where == NULL)
00927 printf ("(null)");
00928 else
00929 {
00930 if (FIRST_STRING_P (where))
00931 {
00932 for (this_char = where - string1; this_char < size1; this_char++)
00933 putchar (string1[this_char]);
00934
00935 where = string2;
00936 }
00937
00938 for (this_char = where - string2; this_char < size2; this_char++)
00939 putchar (string2[this_char]);
00940 }
00941 }
00942
00943 void
00944 printchar (c)
00945 int c;
00946 {
00947 putc (c, stderr);
00948 }
00949
00950 #else
00951
00952 #undef assert
00953 #define assert(e)
00954
00955 #define DEBUG_STATEMENT(e)
00956 #define DEBUG_PRINT1(x)
00957 #define DEBUG_PRINT2(x1, x2)
00958 #define DEBUG_PRINT3(x1, x2, x3)
00959 #define DEBUG_PRINT4(x1, x2, x3, x4)
00960 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
00961 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
00962
00963 #endif
00964
00965
00966
00967
00968
00969
00970 reg_syntax_t re_syntax_options;
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980 reg_syntax_t
00981 re_set_syntax(reg_syntax_t syntax)
00982 {
00983 reg_syntax_t ret = re_syntax_options;
00984
00985 re_syntax_options = syntax;
00986 #ifdef DEBUG
00987 if (syntax & RE_DEBUG)
00988 debug = 1;
00989 else if (debug)
00990 debug = 0;
00991 #endif
00992 return ret;
00993 }
00994
00995
00996
00997
00998
00999
01000 static const char *re_error_msgid[] =
01001 {
01002 gettext_noop ("Success"),
01003 gettext_noop ("No match"),
01004 gettext_noop ("Invalid regular expression"),
01005 gettext_noop ("Invalid collation character"),
01006 gettext_noop ("Invalid character class name"),
01007 gettext_noop ("Trailing backslash"),
01008 gettext_noop ("Invalid back reference"),
01009 gettext_noop ("Unmatched [ or [^"),
01010 gettext_noop ("Unmatched ( or \\("),
01011 gettext_noop ("Unmatched \\{"),
01012 gettext_noop ("Invalid content of \\{\\}"),
01013 gettext_noop ("Invalid range end"),
01014 gettext_noop ("Memory exhausted"),
01015 gettext_noop ("Invalid preceding regular expression"),
01016 gettext_noop ("Premature end of regular expression"),
01017 gettext_noop ("Regular expression too big"),
01018 gettext_noop ("Unmatched ) or \\)"),
01019 };
01020
01021
01022
01023
01024
01025
01026
01027
01028
01029
01030
01031
01032
01033
01034
01035
01036
01037
01038
01039
01040
01041 #define MATCH_MAY_ALLOCATE
01042
01043
01044
01045 #ifdef __GNUC__
01046 #undef C_ALLOCA
01047 #endif
01048
01049
01050
01051
01052
01053
01054 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
01055 #undef MATCH_MAY_ALLOCATE
01056 #endif
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067 #ifndef INIT_FAILURE_ALLOC
01068 #define INIT_FAILURE_ALLOC 5
01069 #endif
01070
01071
01072
01073
01074
01075
01076 #ifdef INT_IS_16BIT
01077
01078 #if defined (MATCH_MAY_ALLOCATE)
01079
01080
01081 static long int re_max_failures = 4000;
01082 #else
01083 static long int re_max_failures = 2000;
01084 #endif
01085
01086 union fail_stack_elt
01087 {
01088 unsigned char *pointer;
01089 long int integer;
01090 };
01091
01092 typedef union fail_stack_elt fail_stack_elt_t;
01093
01094 typedef struct
01095 {
01096 fail_stack_elt_t *stack;
01097 unsigned long int size;
01098 unsigned long int avail;
01099 } fail_stack_type;
01100
01101 #else
01102
01103 #if defined (MATCH_MAY_ALLOCATE)
01104
01105
01106 static int re_max_failures = 20000;
01107 #else
01108 static int re_max_failures = 2000;
01109 #endif
01110
01111 union fail_stack_elt
01112 {
01113 unsigned char *pointer;
01114 int integer;
01115 };
01116
01117 typedef union fail_stack_elt fail_stack_elt_t;
01118
01119 typedef struct
01120 {
01121 fail_stack_elt_t *stack;
01122 unsigned size;
01123 unsigned avail;
01124 } fail_stack_type;
01125
01126 #endif
01127
01128 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
01129 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
01130 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
01131
01132
01133
01134
01135
01136 #ifdef MATCH_MAY_ALLOCATE
01137 #define INIT_FAIL_STACK() \
01138 do { \
01139 fail_stack.stack = (fail_stack_elt_t *) \
01140 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
01141 \
01142 if (fail_stack.stack == NULL) \
01143 return -2; \
01144 \
01145 fail_stack.size = INIT_FAILURE_ALLOC; \
01146 fail_stack.avail = 0; \
01147 } while (0)
01148
01149 #define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
01150 #else
01151 #define INIT_FAIL_STACK() \
01152 do { \
01153 fail_stack.avail = 0; \
01154 } while (0)
01155
01156 #define RESET_FAIL_STACK()
01157 #endif
01158
01159
01160
01161
01162
01163
01164
01165
01166
01167 #define DOUBLE_FAIL_STACK(fail_stack) \
01168 ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
01169 ? 0 \
01170 : ((fail_stack).stack = (fail_stack_elt_t *) \
01171 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
01172 (fail_stack).size * sizeof (fail_stack_elt_t), \
01173 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
01174 \
01175 (fail_stack).stack == NULL \
01176 ? 0 \
01177 : ((fail_stack).size <<= 1, \
01178 1)))
01179
01180
01181
01182
01183
01184 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
01185 ((FAIL_STACK_FULL () \
01186 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
01187 ? 0 \
01188 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
01189 1))
01190
01191
01192
01193
01194 #define PUSH_FAILURE_POINTER(item) \
01195 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
01196
01197
01198
01199
01200 #define PUSH_FAILURE_INT(item) \
01201 fail_stack.stack[fail_stack.avail++].integer = (item)
01202
01203
01204
01205
01206 #define PUSH_FAILURE_ELT(item) \
01207 fail_stack.stack[fail_stack.avail++] = (item)
01208
01209
01210
01211 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
01212 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
01213 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
01214
01215
01216 #ifdef DEBUG
01217 #define DEBUG_PUSH PUSH_FAILURE_INT
01218 #define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()
01219 #else
01220 #define DEBUG_PUSH(item)
01221 #define DEBUG_POP(item_addr)
01222 #endif
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
01235 do { \
01236 char *destination; \
01237
01238 \
01239
01240
01241 \
01242 s_reg_t this_reg; \
01243 \
01244 DEBUG_STATEMENT (failure_id++); \
01245 DEBUG_STATEMENT (nfailure_points_pushed++); \
01246 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
01247 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
01248 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
01249 \
01250 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
01251 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
01252 \
01253 \
01254 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
01255 { \
01256 if (!DOUBLE_FAIL_STACK (fail_stack)) \
01257 return failure_code; \
01258 \
01259 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
01260 (fail_stack).size); \
01261 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
01262 } \
01263 \
01264 \
01265 DEBUG_PRINT1 ("\n"); \
01266 \
01267 if (1) \
01268 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
01269 this_reg++) \
01270 { \
01271 DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
01272 DEBUG_STATEMENT (num_regs_pushed++); \
01273 \
01274 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
01275 PUSH_FAILURE_POINTER (regstart[this_reg]); \
01276 \
01277 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
01278 PUSH_FAILURE_POINTER (regend[this_reg]); \
01279 \
01280 DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
01281 DEBUG_PRINT2 (" match_null=%d", \
01282 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
01283 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
01284 DEBUG_PRINT2 (" matched_something=%d", \
01285 MATCHED_SOMETHING (reg_info[this_reg])); \
01286 DEBUG_PRINT2 (" ever_matched=%d", \
01287 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
01288 DEBUG_PRINT1 ("\n"); \
01289 PUSH_FAILURE_ELT (reg_info[this_reg].word); \
01290 } \
01291 \
01292 DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
01293 PUSH_FAILURE_INT (lowest_active_reg); \
01294 \
01295 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
01296 PUSH_FAILURE_INT (highest_active_reg); \
01297 \
01298 DEBUG_PRINT2 (" Pushing pattern 0x%x:\n", pattern_place); \
01299 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
01300 PUSH_FAILURE_POINTER (pattern_place); \
01301 \
01302 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
01303 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
01304 size2); \
01305 DEBUG_PRINT1 ("'\n"); \
01306 PUSH_FAILURE_POINTER (string_place); \
01307 \
01308 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
01309 DEBUG_PUSH (failure_id); \
01310 } while (0)
01311
01312
01313
01314 #define NUM_REG_ITEMS 3
01315
01316
01317 #ifdef DEBUG
01318 #define NUM_NONREG_ITEMS 5
01319 #else
01320 #define NUM_NONREG_ITEMS 4
01321 #endif
01322
01323
01324
01325
01326
01327 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
01328
01329
01330 #define NUM_FAILURE_ITEMS \
01331 (((0 \
01332 ? 0 : highest_active_reg - lowest_active_reg + 1) \
01333 * NUM_REG_ITEMS) \
01334 + NUM_NONREG_ITEMS)
01335
01336
01337 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
01338
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350
01351
01352 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
01353 { \
01354 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
01355 s_reg_t this_reg; \
01356 const unsigned char *string_temp; \
01357 \
01358 assert (!FAIL_STACK_EMPTY ()); \
01359 \
01360 \
01361 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
01362 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
01363 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
01364 \
01365 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
01366 \
01367 DEBUG_POP (&failure_id); \
01368 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
01369 \
01370
01371
01372 \
01373 string_temp = POP_FAILURE_POINTER (); \
01374 if (string_temp != NULL) \
01375 str = (const char *) string_temp; \
01376 \
01377 DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
01378 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
01379 DEBUG_PRINT1 ("'\n"); \
01380 \
01381 pat = (unsigned char *) POP_FAILURE_POINTER (); \
01382 DEBUG_PRINT2 (" Popping pattern 0x%x:\n", pat); \
01383 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
01384 \
01385 \
01386 high_reg = (active_reg_t) POP_FAILURE_INT (); \
01387 DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
01388 \
01389 low_reg = (active_reg_t) POP_FAILURE_INT (); \
01390 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
01391 \
01392 if (1) \
01393 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
01394 { \
01395 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
01396 \
01397 reg_info[this_reg].word = POP_FAILURE_ELT (); \
01398 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
01399 \
01400 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
01401 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
01402 \
01403 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
01404 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
01405 } \
01406 else \
01407 { \
01408 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
01409 { \
01410 reg_info[this_reg].word.integer = 0; \
01411 regend[this_reg] = 0; \
01412 regstart[this_reg] = 0; \
01413 } \
01414 highest_active_reg = high_reg; \
01415 } \
01416 \
01417 set_regs_matched_done = 0; \
01418 DEBUG_STATEMENT (nfailure_points_popped++); \
01419 }
01420
01421
01422
01423
01424
01425
01426
01427
01428
01429
01430
01431
01432
01433
01434
01435
01436
01437 typedef union
01438 {
01439 fail_stack_elt_t word;
01440 struct
01441 {
01442
01443
01444 #define MATCH_NULL_UNSET_VALUE 3
01445 unsigned match_null_string_p : 2;
01446 unsigned is_active : 1;
01447 unsigned matched_something : 1;
01448 unsigned ever_matched_something : 1;
01449 } bits;
01450 } register_info_type;
01451
01452 #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
01453 #define IS_ACTIVE(R) ((R).bits.is_active)
01454 #define MATCHED_SOMETHING(R) ((R).bits.matched_something)
01455 #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
01456
01457
01458
01459
01460
01461 #define SET_REGS_MATCHED() \
01462 do \
01463 { \
01464 if (!set_regs_matched_done) \
01465 { \
01466 active_reg_t r; \
01467 set_regs_matched_done = 1; \
01468 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
01469 { \
01470 MATCHED_SOMETHING (reg_info[r]) \
01471 = EVER_MATCHED_SOMETHING (reg_info[r]) \
01472 = 1; \
01473 } \
01474 } \
01475 } \
01476 while (0)
01477
01478
01479 static char reg_unset_dummy;
01480 #define REG_UNSET_VALUE (®_unset_dummy)
01481 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
01482
01483
01484
01485 static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
01486 reg_syntax_t syntax,
01487 struct re_pattern_buffer *bufp));
01488 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
01489 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01490 int arg1, int arg2));
01491 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01492 int arg, unsigned char *end));
01493 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01494 int arg1, int arg2, unsigned char *end));
01495 static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
01496 reg_syntax_t syntax));
01497 static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
01498 reg_syntax_t syntax));
01499 static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
01500 const char *pend,
01501 char *translate,
01502 reg_syntax_t syntax,
01503 unsigned char *b));
01504
01505
01506
01507
01508
01509 #ifndef PATFETCH
01510 #define PATFETCH(c) \
01511 do {if (p == pend) return REG_EEND; \
01512 c = (unsigned char) *p++; \
01513 if (translate) c = (unsigned char) translate[c]; \
01514 } while (0)
01515 #endif
01516
01517
01518
01519 #define PATFETCH_RAW(c) \
01520 do {if (p == pend) return REG_EEND; \
01521 c = (unsigned char) *p++; \
01522 } while (0)
01523
01524
01525 #define PATUNFETCH p--
01526
01527
01528
01529
01530
01531
01532 #ifndef TRANSLATE
01533 #define TRANSLATE(d) \
01534 (translate ? (char) translate[(unsigned char) (d)] : (d))
01535 #endif
01536
01537
01538
01539
01540
01541 #define INIT_BUF_SIZE 32
01542
01543
01544 #define GET_BUFFER_SPACE(n) \
01545 while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
01546 EXTEND_BUFFER ()
01547
01548
01549 #define BUF_PUSH(c) \
01550 do { \
01551 GET_BUFFER_SPACE (1); \
01552 *b++ = (unsigned char) (c); \
01553 } while (0)
01554
01555
01556
01557 #define BUF_PUSH_2(c1, c2) \
01558 do { \
01559 GET_BUFFER_SPACE (2); \
01560 *b++ = (unsigned char) (c1); \
01561 *b++ = (unsigned char) (c2); \
01562 } while (0)
01563
01564
01565
01566 #define BUF_PUSH_3(c1, c2, c3) \
01567 do { \
01568 GET_BUFFER_SPACE (3); \
01569 *b++ = (unsigned char) (c1); \
01570 *b++ = (unsigned char) (c2); \
01571 *b++ = (unsigned char) (c3); \
01572 } while (0)
01573
01574
01575
01576
01577 #define STORE_JUMP(op, loc, to) \
01578 store_op1 (op, loc, (int) ((to) - (loc) - 3))
01579
01580
01581 #define STORE_JUMP2(op, loc, to, arg) \
01582 store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
01583
01584
01585 #define INSERT_JUMP(op, loc, to) \
01586 insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
01587
01588
01589 #define INSERT_JUMP2(op, loc, to, arg) \
01590 insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
01591
01592
01593
01594
01595
01596
01597
01598
01599
01600
01601 #if defined(_MSC_VER) && !defined(_WIN32)
01602
01603
01604
01605 #define MAX_BUF_SIZE 65500L
01606 #define REALLOC(p,s) realloc ((p), (size_t) (s))
01607 #else
01608 #define MAX_BUF_SIZE (1L << 16)
01609 #define REALLOC(p,s) realloc ((p), (s))
01610 #endif
01611
01612
01613
01614
01615
01616 #define EXTEND_BUFFER() \
01617 do { \
01618 unsigned char *old_buffer = bufp->buffer; \
01619 if (bufp->allocated == MAX_BUF_SIZE) \
01620 return REG_ESIZE; \
01621 bufp->allocated <<= 1; \
01622 if (bufp->allocated > MAX_BUF_SIZE) \
01623 bufp->allocated = MAX_BUF_SIZE; \
01624 bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
01625 if (bufp->buffer == NULL) \
01626 return REG_ESPACE; \
01627 \
01628 if (old_buffer != bufp->buffer) \
01629 { \
01630 b = (b - old_buffer) + bufp->buffer; \
01631 begalt = (begalt - old_buffer) + bufp->buffer; \
01632 if (fixup_alt_jump) \
01633 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
01634 if (laststart) \
01635 laststart = (laststart - old_buffer) + bufp->buffer; \
01636 if (pending_exact) \
01637 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
01638 } \
01639 } while (0)
01640
01641
01642
01643
01644
01645 #define MAX_REGNUM 255
01646
01647
01648
01649 typedef unsigned regnum_t;
01650
01651
01652
01653
01654
01655
01656
01657 typedef long pattern_offset_t;
01658
01659 typedef struct
01660 {
01661 pattern_offset_t begalt_offset;
01662 pattern_offset_t fixup_alt_jump;
01663 pattern_offset_t inner_group_offset;
01664 pattern_offset_t laststart_offset;
01665 regnum_t regnum;
01666 } compile_stack_elt_t;
01667
01668
01669 typedef struct
01670 {
01671 compile_stack_elt_t *stack;
01672 unsigned size;
01673 unsigned avail;
01674 } compile_stack_type;
01675
01676
01677 #define INIT_COMPILE_STACK_SIZE 32
01678
01679 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
01680 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
01681
01682
01683 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
01684
01685
01686
01687 #define SET_LIST_BIT(c) \
01688 (b[((unsigned char) (c)) / BYTEWIDTH] \
01689 |= 1 << (((unsigned char) c) % BYTEWIDTH))
01690
01691
01692
01693 #define GET_UNSIGNED_NUMBER(num) \
01694 { if (p != pend) \
01695 { \
01696 PATFETCH (c); \
01697 while (ISDIGIT (c)) \
01698 { \
01699 if (num < 0) \
01700 num = 0; \
01701 num = num * 10 + c - '0'; \
01702 if (p == pend) \
01703 break; \
01704 PATFETCH (c); \
01705 } \
01706 } \
01707 }
01708
01709 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
01710
01711
01712 # ifdef CHARCLASS_NAME_MAX
01713 # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
01714 # else
01715
01716
01717 # define CHAR_CLASS_MAX_LENGTH 256
01718 # endif
01719
01720 # define IS_CHAR_CLASS(string) wctype (string)
01721 #else
01722 # define CHAR_CLASS_MAX_LENGTH 6
01723
01724 # define IS_CHAR_CLASS(string) \
01725 (STREQ (string, "alpha") || STREQ (string, "upper") \
01726 || STREQ (string, "lower") || STREQ (string, "digit") \
01727 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
01728 || STREQ (string, "space") || STREQ (string, "print") \
01729 || STREQ (string, "punct") || STREQ (string, "graph") \
01730 || STREQ (string, "cntrl") || STREQ (string, "blank"))
01731 #endif
01732
01733 #ifndef MATCH_MAY_ALLOCATE
01734
01735
01736
01737
01738
01739
01740
01741
01742 static fail_stack_type fail_stack;
01743
01744
01745
01746
01747 static int regs_allocated_size;
01748
01749 static const char ** regstart, ** regend;
01750 static const char ** old_regstart, ** old_regend;
01751 static const char **best_regstart, **best_regend;
01752 static register_info_type *reg_info;
01753 static const char **reg_dummy;
01754 static register_info_type *reg_info_dummy;
01755
01756
01757
01758
01759 static
01760 regex_grow_registers (num_regs)
01761 int num_regs;
01762 {
01763 if (num_regs > regs_allocated_size)
01764 {
01765 RETALLOC_IF (regstart, num_regs, const char *);
01766 RETALLOC_IF (regend, num_regs, const char *);
01767 RETALLOC_IF (old_regstart, num_regs, const char *);
01768 RETALLOC_IF (old_regend, num_regs, const char *);
01769 RETALLOC_IF (best_regstart, num_regs, const char *);
01770 RETALLOC_IF (best_regend, num_regs, const char *);
01771 RETALLOC_IF (reg_info, num_regs, register_info_type);
01772 RETALLOC_IF (reg_dummy, num_regs, const char *);
01773 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
01774
01775 regs_allocated_size = num_regs;
01776 }
01777 }
01778
01779 #endif
01780
01781 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
01782 compile_stack,
01783 regnum_t regnum));
01784
01785
01786
01787
01788
01789
01790
01791
01792
01793
01794
01795
01796
01797
01798
01799
01800
01801
01802
01803
01804 #define FREE_STACK_RETURN(value) \
01805 return (free (compile_stack.stack), value)
01806
01807 static reg_errcode_t
01808 regex_compile (const char *pattern,
01809 size_t size,
01810 reg_syntax_t syntax,
01811 struct re_pattern_buffer *bufp)
01812 {
01813
01814
01815
01816 register unsigned char c, c1;
01817
01818
01819 const char *p1;
01820
01821
01822 register unsigned char *b;
01823
01824
01825 compile_stack_type compile_stack;
01826
01827
01828 const char *p = pattern;
01829 const char *pend = pattern + size;
01830
01831
01832 RE_TRANSLATE_TYPE translate = bufp->translate;
01833
01834
01835
01836
01837
01838 unsigned char *pending_exact = 0;
01839
01840
01841
01842
01843 unsigned char *laststart = 0;
01844
01845
01846 unsigned char *begalt;
01847
01848
01849
01850 const char *beg_interval;
01851
01852
01853
01854
01855 unsigned char *fixup_alt_jump = 0;
01856
01857
01858
01859
01860 regnum_t regnum = 0;
01861
01862 #ifdef DEBUG
01863 DEBUG_PRINT1 ("\nCompiling pattern: ");
01864 if (debug)
01865 {
01866 unsigned debug_count;
01867
01868 for (debug_count = 0; debug_count < size; debug_count++)
01869 putchar (pattern[debug_count]);
01870 putchar ('\n');
01871 }
01872 #endif
01873
01874
01875 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
01876 if (compile_stack.stack == NULL)
01877 return REG_ESPACE;
01878
01879 compile_stack.size = INIT_COMPILE_STACK_SIZE;
01880 compile_stack.avail = 0;
01881
01882
01883 bufp->syntax = syntax;
01884 bufp->fastmap_accurate = 0;
01885 bufp->not_bol = bufp->not_eol = 0;
01886
01887
01888
01889
01890 bufp->used = 0;
01891
01892
01893 bufp->re_nsub = 0;
01894
01895 #if !defined (emacs) && !defined (SYNTAX_TABLE)
01896
01897 init_syntax_once ();
01898 #endif
01899
01900 if (bufp->allocated == 0)
01901 {
01902 if (bufp->buffer)
01903 {
01904
01905
01906 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
01907 }
01908 else
01909 {
01910 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
01911 }
01912 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
01913
01914 bufp->allocated = INIT_BUF_SIZE;
01915 }
01916
01917 begalt = b = bufp->buffer;
01918
01919
01920 while (p != pend)
01921 {
01922 PATFETCH (c);
01923
01924 switch (c)
01925 {
01926 case '^':
01927 {
01928 if (
01929 p == pattern + 1
01930
01931 || syntax & RE_CONTEXT_INDEP_ANCHORS
01932
01933 || at_begline_loc_p (pattern, p, syntax))
01934 BUF_PUSH (begline);
01935 else
01936 goto normal_char;
01937 }
01938 break;
01939
01940
01941 case '$':
01942 {
01943 if (
01944 p == pend
01945
01946 || syntax & RE_CONTEXT_INDEP_ANCHORS
01947
01948 || at_endline_loc_p (p, pend, syntax))
01949 BUF_PUSH (endline);
01950 else
01951 goto normal_char;
01952 }
01953 break;
01954
01955
01956 case '+':
01957 case '?':
01958 if ((syntax & RE_BK_PLUS_QM)
01959 || (syntax & RE_LIMITED_OPS))
01960 goto normal_char;
01961 handle_plus:
01962 case '*':
01963
01964 if (!laststart)
01965 {
01966 if (syntax & RE_CONTEXT_INVALID_OPS)
01967 FREE_STACK_RETURN (REG_BADRPT);
01968 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
01969 goto normal_char;
01970 }
01971
01972 {
01973
01974 boolean keep_string_p = false;
01975
01976
01977 char zero_times_ok = 0, many_times_ok = 0;
01978
01979
01980
01981
01982
01983
01984 for (;;)
01985 {
01986 zero_times_ok |= c != '+';
01987 many_times_ok |= c != '?';
01988
01989 if (p == pend)
01990 break;
01991
01992 PATFETCH (c);
01993
01994 if (c == '*'
01995 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
01996 ;
01997
01998 else if (syntax & RE_BK_PLUS_QM && c == '\\')
01999 {
02000 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02001
02002 PATFETCH (c1);
02003 if (!(c1 == '+' || c1 == '?'))
02004 {
02005 PATUNFETCH;
02006 PATUNFETCH;
02007 break;
02008 }
02009
02010 c = c1;
02011 }
02012 else
02013 {
02014 PATUNFETCH;
02015 break;
02016 }
02017
02018
02019 }
02020
02021
02022
02023 if (!laststart)
02024 break;
02025
02026
02027
02028 if (many_times_ok)
02029 {
02030
02031
02032
02033
02034
02035
02036
02037
02038
02039 assert (p - 1 > pattern);
02040
02041
02042 GET_BUFFER_SPACE (3);
02043
02044
02045
02046
02047
02048
02049 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
02050 && zero_times_ok
02051 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
02052 && !(syntax & RE_DOT_NEWLINE))
02053 {
02054 STORE_JUMP (jump, b, laststart);
02055 keep_string_p = true;
02056 }
02057 else
02058
02059 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
02060
02061
02062 b += 3;
02063 }
02064
02065
02066
02067 GET_BUFFER_SPACE (3);
02068 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
02069 : on_failure_jump,
02070 laststart, b + 3);
02071 pending_exact = 0;
02072 b += 3;
02073
02074 if (!zero_times_ok)
02075 {
02076
02077
02078
02079
02080
02081 GET_BUFFER_SPACE (3);
02082 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
02083 b += 3;
02084 }
02085 }
02086 break;
02087
02088
02089 case '.':
02090 laststart = b;
02091 BUF_PUSH (anychar);
02092 break;
02093
02094
02095 case '[':
02096 {
02097 boolean had_char_class = false;
02098
02099 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02100
02101
02102
02103 GET_BUFFER_SPACE (34);
02104
02105 laststart = b;
02106
02107
02108
02109 BUF_PUSH (*p == '^' ? charset_not : charset);
02110 if (*p == '^')
02111 p++;
02112
02113
02114 p1 = p;
02115
02116
02117 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
02118
02119
02120 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
02121
02122
02123 if ((re_opcode_t) b[-2] == charset_not
02124 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
02125 SET_LIST_BIT ('\n');
02126
02127
02128 for (;;)
02129 {
02130 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02131
02132 PATFETCH (c);
02133
02134
02135 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
02136 {
02137 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02138
02139 PATFETCH (c1);
02140 SET_LIST_BIT (c1);
02141 continue;
02142 }
02143
02144
02145
02146
02147 if (c == ']' && p != p1 + 1)
02148 break;
02149
02150
02151
02152 if (had_char_class && c == '-' && *p != ']')
02153 FREE_STACK_RETURN (REG_ERANGE);
02154
02155
02156
02157
02158
02159 if (c == '-'
02160 && !(p - 2 >= pattern && p[-2] == '[')
02161 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
02162 && *p != ']')
02163 {
02164 reg_errcode_t ret
02165 = compile_range (&p, pend, translate, syntax, b);
02166 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02167 }
02168
02169 else if (p[0] == '-' && p[1] != ']')
02170 {
02171 reg_errcode_t ret;
02172
02173
02174 PATFETCH (c1);
02175
02176 ret = compile_range (&p, pend, translate, syntax, b);
02177 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02178 }
02179
02180
02181
02182
02183 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
02184 {
02185 char str[CHAR_CLASS_MAX_LENGTH + 1];
02186
02187 PATFETCH (c);
02188 c1 = 0;
02189
02190
02191 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02192
02193 for (;;)
02194 {
02195 PATFETCH (c);
02196 if (c == ':' || c == ']' || p == pend
02197 || c1 == CHAR_CLASS_MAX_LENGTH)
02198 break;
02199 str[c1++] = c;
02200 }
02201 str[c1] = '\0';
02202
02203
02204
02205
02206 if (c == ':' && *p == ']')
02207 {
02208 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
02209 boolean is_lower = STREQ (str, "lower");
02210 boolean is_upper = STREQ (str, "upper");
02211 wctype_t wt;
02212 int ch;
02213
02214 wt = wctype (str);
02215 if (wt == 0)
02216 FREE_STACK_RETURN (REG_ECTYPE);
02217
02218
02219
02220 PATFETCH (c);
02221
02222 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02223
02224 for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
02225 {
02226 if (iswctype (btowc (ch), wt))
02227 SET_LIST_BIT (ch);
02228
02229 if (translate && (is_upper || is_lower)
02230 && (ISUPPER (ch) || ISLOWER (ch)))
02231 SET_LIST_BIT (ch);
02232 }
02233
02234 had_char_class = true;
02235 #else
02236 int ch;
02237 boolean is_alnum = STREQ (str, "alnum");
02238 boolean is_alpha = STREQ (str, "alpha");
02239 boolean is_blank = STREQ (str, "blank");
02240 boolean is_cntrl = STREQ (str, "cntrl");
02241 boolean is_digit = STREQ (str, "digit");
02242 boolean is_graph = STREQ (str, "graph");
02243 boolean is_lower = STREQ (str, "lower");
02244 boolean is_print = STREQ (str, "print");
02245 boolean is_punct = STREQ (str, "punct");
02246 boolean is_space = STREQ (str, "space");
02247 boolean is_upper = STREQ (str, "upper");
02248 boolean is_xdigit = STREQ (str, "xdigit");
02249
02250 if (!IS_CHAR_CLASS (str))
02251 FREE_STACK_RETURN (REG_ECTYPE);
02252
02253
02254
02255 PATFETCH (c);
02256
02257 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02258
02259 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
02260 {
02261
02262
02263 if ( (is_alnum && ISALNUM (ch))
02264 || (is_alpha && ISALPHA (ch))
02265 || (is_blank && ISBLANK (ch))
02266 || (is_cntrl && ISCNTRL (ch)))
02267 SET_LIST_BIT (ch);
02268 if ( (is_digit && ISDIGIT (ch))
02269 || (is_graph && ISGRAPH (ch))
02270 || (is_lower && ISLOWER (ch))
02271 || (is_print && ISPRINT (ch)))
02272 SET_LIST_BIT (ch);
02273 if ( (is_punct && ISPUNCT (ch))
02274 || (is_space && ISSPACE (ch))
02275 || (is_upper && ISUPPER (ch))
02276 || (is_xdigit && ISXDIGIT (ch)))
02277 SET_LIST_BIT (ch);
02278 if ( translate && (is_upper || is_lower)
02279 && (ISUPPER (ch) || ISLOWER (ch)))
02280 SET_LIST_BIT (ch);
02281 }
02282 had_char_class = true;
02283 #endif
02284 }
02285 else
02286 {
02287 c1++;
02288 while (c1--)
02289 PATUNFETCH;
02290 SET_LIST_BIT ('[');
02291 SET_LIST_BIT (':');
02292 had_char_class = false;
02293 }
02294 }
02295 else
02296 {
02297 had_char_class = false;
02298 SET_LIST_BIT (c);
02299 }
02300 }
02301
02302
02303
02304 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
02305 b[-1]--;
02306 b += b[-1];
02307 }
02308 break;
02309
02310
02311 case '(':
02312 if (syntax & RE_NO_BK_PARENS)
02313 goto handle_open;
02314 else
02315 goto normal_char;
02316
02317
02318 case ')':
02319 if (syntax & RE_NO_BK_PARENS)
02320 goto handle_close;
02321 else
02322 goto normal_char;
02323
02324
02325 case '\n':
02326 if (syntax & RE_NEWLINE_ALT)
02327 goto handle_alt;
02328 else
02329 goto normal_char;
02330
02331
02332 case '|':
02333 if (syntax & RE_NO_BK_VBAR)
02334 goto handle_alt;
02335 else
02336 goto normal_char;
02337
02338
02339 case '{':
02340 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
02341 goto handle_interval;
02342 else
02343 goto normal_char;
02344
02345
02346 case '\\':
02347 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02348
02349
02350
02351
02352 PATFETCH_RAW (c);
02353
02354 switch (c)
02355 {
02356 case '(':
02357 if (syntax & RE_NO_BK_PARENS)
02358 goto normal_backslash;
02359
02360 handle_open:
02361 bufp->re_nsub++;
02362 regnum++;
02363
02364 if (COMPILE_STACK_FULL)
02365 {
02366 RETALLOC (compile_stack.stack, compile_stack.size << 1,
02367 compile_stack_elt_t);
02368 if (compile_stack.stack == NULL) return REG_ESPACE;
02369
02370 compile_stack.size <<= 1;
02371 }
02372
02373
02374
02375
02376
02377 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
02378 COMPILE_STACK_TOP.fixup_alt_jump
02379 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
02380 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
02381 COMPILE_STACK_TOP.regnum = regnum;
02382
02383
02384
02385
02386
02387 if (regnum <= MAX_REGNUM)
02388 {
02389 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
02390 BUF_PUSH_3 (start_memory, regnum, 0);
02391 }
02392
02393 compile_stack.avail++;
02394
02395 fixup_alt_jump = 0;
02396 laststart = 0;
02397 begalt = b;
02398
02399
02400
02401 pending_exact = 0;
02402 break;
02403
02404
02405 case ')':
02406 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
02407
02408 if (COMPILE_STACK_EMPTY) {
02409 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02410 goto normal_backslash;
02411 else
02412 FREE_STACK_RETURN (REG_ERPAREN);
02413 }
02414 handle_close:
02415 if (fixup_alt_jump)
02416 {
02417
02418
02419
02420 BUF_PUSH (push_dummy_failure);
02421
02422
02423
02424 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
02425 }
02426
02427
02428 if (COMPILE_STACK_EMPTY) {
02429 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02430 goto normal_char;
02431 else
02432 FREE_STACK_RETURN (REG_ERPAREN);
02433 }
02434
02435
02436 assert (compile_stack.avail != 0);
02437 {
02438
02439
02440
02441 regnum_t this_group_regnum;
02442
02443 compile_stack.avail--;
02444 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
02445 fixup_alt_jump
02446 = COMPILE_STACK_TOP.fixup_alt_jump
02447 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
02448 : 0;
02449 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
02450 this_group_regnum = COMPILE_STACK_TOP.regnum;
02451
02452
02453
02454 pending_exact = 0;
02455
02456
02457
02458 if (this_group_regnum <= MAX_REGNUM)
02459 {
02460 unsigned char *inner_group_loc
02461 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
02462
02463 *inner_group_loc = regnum - this_group_regnum;
02464 BUF_PUSH_3 (stop_memory, this_group_regnum,
02465 regnum - this_group_regnum);
02466 }
02467 }
02468 break;
02469
02470
02471 case '|':
02472 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
02473 goto normal_backslash;
02474 handle_alt:
02475 if (syntax & RE_LIMITED_OPS)
02476 goto normal_char;
02477
02478
02479
02480 GET_BUFFER_SPACE (3);
02481 INSERT_JUMP (on_failure_jump, begalt, b + 6);
02482 pending_exact = 0;
02483 b += 3;
02484
02485
02486
02487
02488
02489
02490
02491
02492
02493
02494
02495
02496
02497
02498
02499
02500
02501 if (fixup_alt_jump)
02502 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02503
02504
02505
02506
02507 fixup_alt_jump = b;
02508 GET_BUFFER_SPACE (3);
02509 b += 3;
02510
02511 laststart = 0;
02512 begalt = b;
02513 break;
02514
02515
02516 case '{':
02517
02518 if (!(syntax & RE_INTERVALS)
02519
02520
02521 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
02522 || (p - 2 == pattern && p == pend))
02523 goto normal_backslash;
02524
02525 handle_interval:
02526 {
02527
02528
02529
02530 int lower_bound = -1, upper_bound = -1;
02531
02532 beg_interval = p - 1;
02533
02534 if (p == pend)
02535 {
02536 if (syntax & RE_NO_BK_BRACES)
02537 goto unfetch_interval;
02538 else
02539 FREE_STACK_RETURN (REG_EBRACE);
02540 }
02541
02542 GET_UNSIGNED_NUMBER (lower_bound);
02543
02544 if (c == ',')
02545 {
02546 GET_UNSIGNED_NUMBER (upper_bound);
02547 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
02548 }
02549 else
02550
02551 upper_bound = lower_bound;
02552
02553 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
02554 || lower_bound > upper_bound)
02555 {
02556 if (syntax & RE_NO_BK_BRACES)
02557 goto unfetch_interval;
02558 else
02559 FREE_STACK_RETURN (REG_BADBR);
02560 }
02561
02562 if (!(syntax & RE_NO_BK_BRACES))
02563 {
02564 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
02565
02566 PATFETCH (c);
02567 }
02568
02569 if (c != '}')
02570 {
02571 if (syntax & RE_NO_BK_BRACES)
02572 goto unfetch_interval;
02573 else
02574 FREE_STACK_RETURN (REG_BADBR);
02575 }
02576
02577
02578
02579
02580 if (!laststart)
02581 {
02582 if (syntax & RE_CONTEXT_INVALID_OPS)
02583 FREE_STACK_RETURN (REG_BADRPT);
02584 else if (syntax & RE_CONTEXT_INDEP_OPS)
02585 laststart = b;
02586 else
02587 goto unfetch_interval;
02588 }
02589
02590
02591
02592
02593 if (upper_bound == 0)
02594 {
02595 GET_BUFFER_SPACE (3);
02596 INSERT_JUMP (jump, laststart, b + 3);
02597 b += 3;
02598 }
02599
02600
02601
02602
02603
02604
02605
02606
02607
02608
02609 else
02610 {
02611
02612 unsigned nbytes = 10 + (upper_bound > 1) * 10;
02613
02614 GET_BUFFER_SPACE (nbytes);
02615
02616
02617
02618
02619
02620
02621 INSERT_JUMP2 (succeed_n, laststart,
02622 b + 5 + (upper_bound > 1) * 5,
02623 lower_bound);
02624 b += 5;
02625
02626
02627
02628
02629
02630 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
02631 b += 5;
02632
02633 if (upper_bound > 1)
02634 {
02635
02636
02637
02638
02639
02640
02641 STORE_JUMP2 (jump_n, b, laststart + 5,
02642 upper_bound - 1);
02643 b += 5;
02644
02645
02646
02647
02648
02649
02650
02651
02652
02653
02654
02655
02656
02657
02658
02659 insert_op2 (set_number_at, laststart, b - laststart,
02660 upper_bound - 1, b);
02661 b += 5;
02662 }
02663 }
02664 pending_exact = 0;
02665 beg_interval = NULL;
02666 }
02667 break;
02668
02669 unfetch_interval:
02670
02671 assert (beg_interval);
02672 p = beg_interval;
02673 beg_interval = NULL;
02674
02675
02676 PATFETCH (c);
02677
02678 if (!(syntax & RE_NO_BK_BRACES))
02679 {
02680 if (p > pattern && p[-1] == '\\')
02681 goto normal_backslash;
02682 }
02683 goto normal_char;
02684
02685 #ifdef emacs
02686
02687
02688 case '=':
02689 BUF_PUSH (at_dot);
02690 break;
02691
02692 case 's':
02693 laststart = b;
02694 PATFETCH (c);
02695 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
02696 break;
02697
02698 case 'S':
02699 laststart = b;
02700 PATFETCH (c);
02701 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
02702 break;
02703 #endif
02704
02705
02706 case 'w':
02707 if (re_syntax_options & RE_NO_GNU_OPS)
02708 goto normal_char;
02709 laststart = b;
02710 BUF_PUSH (wordchar);
02711 break;
02712
02713
02714 case 'W':
02715 if (re_syntax_options & RE_NO_GNU_OPS)
02716 goto normal_char;
02717 laststart = b;
02718 BUF_PUSH (notwordchar);
02719 break;
02720
02721
02722 case '<':
02723 if (re_syntax_options & RE_NO_GNU_OPS)
02724 goto normal_char;
02725 BUF_PUSH (wordbeg);
02726 break;
02727
02728 case '>':
02729 if (re_syntax_options & RE_NO_GNU_OPS)
02730 goto normal_char;
02731 BUF_PUSH (wordend);
02732 break;
02733
02734 case 'b':
02735 if (re_syntax_options & RE_NO_GNU_OPS)
02736 goto normal_char;
02737 BUF_PUSH (wordbound);
02738 break;
02739
02740 case 'B':
02741 if (re_syntax_options & RE_NO_GNU_OPS)
02742 goto normal_char;
02743 BUF_PUSH (notwordbound);
02744 break;
02745
02746 case '`':
02747 if (re_syntax_options & RE_NO_GNU_OPS)
02748 goto normal_char;
02749 BUF_PUSH (begbuf);
02750 break;
02751
02752 case '\'':
02753 if (re_syntax_options & RE_NO_GNU_OPS)
02754 goto normal_char;
02755 BUF_PUSH (endbuf);
02756 break;
02757
02758 case '1': case '2': case '3': case '4': case '5':
02759 case '6': case '7': case '8': case '9':
02760 if (syntax & RE_NO_BK_REFS)
02761 goto normal_char;
02762
02763 c1 = c - '0';
02764
02765 if (c1 > regnum)
02766 FREE_STACK_RETURN (REG_ESUBREG);
02767
02768
02769 if (group_in_compile_stack (compile_stack, (regnum_t) c1))
02770 goto normal_char;
02771
02772 laststart = b;
02773 BUF_PUSH_2 (duplicate, c1);
02774 break;
02775
02776
02777 case '+':
02778 case '?':
02779 if (syntax & RE_BK_PLUS_QM)
02780 goto handle_plus;
02781 else
02782 goto normal_backslash;
02783
02784 default:
02785 normal_backslash:
02786
02787
02788
02789 c = TRANSLATE (c);
02790 goto normal_char;
02791 }
02792 break;
02793
02794
02795 default:
02796
02797 normal_char:
02798
02799 if (!pending_exact
02800
02801
02802 || pending_exact + *pending_exact + 1 != b
02803
02804
02805 || *pending_exact == (1 << BYTEWIDTH) - 1
02806
02807
02808 || *p == '*' || *p == '^'
02809 || ((syntax & RE_BK_PLUS_QM)
02810 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
02811 : (*p == '+' || *p == '?'))
02812 || ((syntax & RE_INTERVALS)
02813 && ((syntax & RE_NO_BK_BRACES)
02814 ? *p == '{'
02815 : (p[0] == '\\' && p[1] == '{'))))
02816 {
02817
02818
02819 laststart = b;
02820
02821 BUF_PUSH_2 (exactn, 0);
02822 pending_exact = b - 1;
02823 }
02824
02825 BUF_PUSH (c);
02826 (*pending_exact)++;
02827 break;
02828 }
02829 }
02830
02831
02832
02833
02834 if (fixup_alt_jump)
02835 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02836
02837 if (!COMPILE_STACK_EMPTY)
02838 FREE_STACK_RETURN (REG_EPAREN);
02839
02840
02841
02842 if (syntax & RE_NO_POSIX_BACKTRACKING)
02843 BUF_PUSH (succeed);
02844
02845 free (compile_stack.stack);
02846
02847
02848 bufp->used = b - bufp->buffer;
02849
02850 #ifdef DEBUG
02851 if (debug)
02852 {
02853 DEBUG_PRINT1 ("\nCompiled pattern: \n");
02854 print_compiled_pattern (bufp);
02855 }
02856 #endif
02857
02858 #ifndef MATCH_MAY_ALLOCATE
02859
02860
02861
02862 {
02863 int num_regs = bufp->re_nsub + 1;
02864
02865
02866
02867
02868 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
02869 {
02870 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
02871
02872 #ifdef emacs
02873 if (! fail_stack.stack)
02874 fail_stack.stack
02875 = (fail_stack_elt_t *) xmalloc (fail_stack.size
02876 * sizeof (fail_stack_elt_t));
02877 else
02878 fail_stack.stack
02879 = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
02880 (fail_stack.size
02881 * sizeof (fail_stack_elt_t)));
02882 #else
02883 if (! fail_stack.stack)
02884 fail_stack.stack
02885 = (fail_stack_elt_t *) malloc (fail_stack.size
02886 * sizeof (fail_stack_elt_t));
02887 else
02888 fail_stack.stack
02889 = (fail_stack_elt_t *) realloc (fail_stack.stack,
02890 (fail_stack.size
02891 * sizeof (fail_stack_elt_t)));
02892 #endif
02893 }
02894
02895 regex_grow_registers (num_regs);
02896 }
02897 #endif
02898
02899 return REG_NOERROR;
02900 }
02901
02902
02903
02904
02905
02906 static void
02907 store_op1 (re_opcode_t op,
02908 unsigned char *loc,
02909 int arg)
02910 {
02911 *loc = (unsigned char) op;
02912 STORE_NUMBER (loc + 1, arg);
02913 }
02914
02915
02916
02917
02918 static void
02919 store_op2(re_opcode_t op,
02920 unsigned char *loc,
02921 int arg1,
02922 int arg2)
02923 {
02924 *loc = (unsigned char) op;
02925 STORE_NUMBER (loc + 1, arg1);
02926 STORE_NUMBER (loc + 3, arg2);
02927 }
02928
02929
02930
02931
02932
02933 static void
02934 insert_op1(re_opcode_t op,
02935 unsigned char *loc,
02936 int arg,
02937 unsigned char *end)
02938 {
02939 register unsigned char *pfrom = end;
02940 register unsigned char *pto = end + 3;
02941
02942 while (pfrom != loc)
02943 *--pto = *--pfrom;
02944
02945 store_op1 (op, loc, arg);
02946 }
02947
02948
02949
02950
02951 static void
02952 insert_op2(re_opcode_t op,
02953 unsigned char *loc,
02954 int arg1,
02955 int arg2,
02956 unsigned char *end)
02957 {
02958 register unsigned char *pfrom = end;
02959 register unsigned char *pto = end + 5;
02960
02961 while (pfrom != loc)
02962 *--pto = *--pfrom;
02963
02964 store_op2 (op, loc, arg1, arg2);
02965 }
02966
02967
02968
02969
02970
02971
02972 static boolean
02973 at_begline_loc_p(const char *pattern,
02974 const char *p,
02975 reg_syntax_t syntax)
02976 {
02977 const char *prev = p - 2;
02978 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
02979
02980 return
02981
02982 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
02983
02984 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
02985 }
02986
02987
02988
02989
02990
02991 static boolean
02992 at_endline_loc_p(const char *p,
02993 const char *pend,
02994 reg_syntax_t syntax)
02995 {
02996 const char *next = p;
02997 boolean next_backslash = *next == '\\';
02998 const char *next_next = p + 1 < pend ? p + 1 : 0;
02999
03000 return
03001
03002 (syntax & RE_NO_BK_PARENS ? *next == ')'
03003 : next_backslash && next_next && *next_next == ')')
03004
03005 || (syntax & RE_NO_BK_VBAR ? *next == '|'
03006 : next_backslash && next_next && *next_next == '|');
03007 }
03008
03009
03010
03011
03012
03013 static boolean
03014 group_in_compile_stack(compile_stack_type compile_stack,
03015 regnum_t regnum)
03016 {
03017 int this_element;
03018
03019 for (this_element = compile_stack.avail - 1;
03020 this_element >= 0;
03021 this_element--)
03022 if (compile_stack.stack[this_element].regnum == regnum)
03023 return true;
03024
03025 return false;
03026 }
03027
03028
03029
03030
03031
03032
03033
03034
03035
03036
03037
03038
03039
03040 static reg_errcode_t
03041 compile_range(const char **p_ptr,
03042 const char *pend,
03043 RE_TRANSLATE_TYPE translate,
03044 reg_syntax_t syntax,
03045 unsigned char *b)
03046 {
03047 unsigned this_char;
03048
03049 const char *p = *p_ptr;
03050 unsigned int range_start, range_end;
03051
03052 if (p == pend)
03053 return REG_ERANGE;
03054
03055
03056
03057
03058
03059
03060
03061
03062
03063 range_start = ((const unsigned char *) p)[-2];
03064 range_end = ((const unsigned char *) p)[0];
03065
03066
03067
03068 (*p_ptr)++;
03069
03070
03071 if (range_start > range_end)
03072 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
03073
03074
03075
03076
03077
03078 for (this_char = range_start; this_char <= range_end; this_char++)
03079 {
03080 SET_LIST_BIT (TRANSLATE (this_char));
03081 }
03082
03083 return REG_NOERROR;
03084 }
03085
03086
03087
03088
03089
03090
03091
03092
03093
03094
03095
03096
03097
03098
03099 int
03100 re_compile_fastmap(struct re_pattern_buffer *bufp)
03101 {
03102 int j, k;
03103 #ifdef MATCH_MAY_ALLOCATE
03104 fail_stack_type fail_stack;
03105 #endif
03106 #ifndef REGEX_MALLOC
03107 char *destination;
03108 #endif
03109
03110 register char *fastmap = bufp->fastmap;
03111 unsigned char *pattern = bufp->buffer;
03112 unsigned char *p = pattern;
03113 register unsigned char *pend = pattern + bufp->used;
03114
03115 #ifdef REL_ALLOC
03116
03117
03118 fail_stack_elt_t *failure_stack_ptr;
03119 #endif
03120
03121
03122
03123
03124
03125 boolean path_can_be_null = true;
03126
03127
03128 boolean succeed_n_p = false;
03129
03130 assert (fastmap != NULL && p != NULL);
03131
03132 INIT_FAIL_STACK ();
03133 bzero (fastmap, 1 << BYTEWIDTH);
03134 bufp->fastmap_accurate = 1;
03135 bufp->can_be_null = 0;
03136
03137 while (1)
03138 {
03139 if (p == pend || *p == succeed)
03140 {
03141
03142 if (!FAIL_STACK_EMPTY ())
03143 {
03144 bufp->can_be_null |= path_can_be_null;
03145
03146
03147 path_can_be_null = true;
03148
03149 p = fail_stack.stack[--fail_stack.avail].pointer;
03150
03151 continue;
03152 }
03153 else
03154 break;
03155 }
03156
03157
03158 assert (p < pend);
03159
03160 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
03161 {
03162
03163
03164
03165
03166
03167
03168 case duplicate:
03169 bufp->can_be_null = 1;
03170 goto done;
03171
03172
03173
03174
03175
03176 case exactn:
03177 fastmap[p[1]] = 1;
03178 break;
03179
03180
03181 case charset:
03182 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
03183 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
03184 fastmap[j] = 1;
03185 break;
03186
03187
03188 case charset_not:
03189
03190 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
03191 fastmap[j] = 1;
03192
03193 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
03194 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
03195 fastmap[j] = 1;
03196 break;
03197
03198
03199 case wordchar:
03200 for (j = 0; j < (1 << BYTEWIDTH); j++)
03201 if (SYNTAX (j) == Sword)
03202 fastmap[j] = 1;
03203 break;
03204
03205
03206 case notwordchar:
03207 for (j = 0; j < (1 << BYTEWIDTH); j++)
03208 if (SYNTAX (j) != Sword)
03209 fastmap[j] = 1;
03210 break;
03211
03212
03213 case anychar:
03214 {
03215 int fastmap_newline = fastmap['\n'];
03216
03217
03218 for (j = 0; j < (1 << BYTEWIDTH); j++)
03219 fastmap[j] = 1;
03220
03221
03222 if (!(bufp->syntax & RE_DOT_NEWLINE))
03223 fastmap['\n'] = fastmap_newline;
03224
03225
03226
03227 else if (bufp->can_be_null)
03228 goto done;
03229
03230
03231 break;
03232 }
03233
03234 #ifdef emacs
03235 case syntaxspec:
03236 k = *p++;
03237 for (j = 0; j < (1 << BYTEWIDTH); j++)
03238 if (SYNTAX (j) == (enum syntaxcode) k)
03239 fastmap[j] = 1;
03240 break;
03241
03242
03243 case notsyntaxspec:
03244 k = *p++;
03245 for (j = 0; j < (1 << BYTEWIDTH); j++)
03246 if (SYNTAX (j) != (enum syntaxcode) k)
03247 fastmap[j] = 1;
03248 break;
03249
03250
03251
03252
03253
03254
03255 case before_dot:
03256 case at_dot:
03257 case after_dot:
03258 continue;
03259 #endif
03260
03261
03262 case no_op:
03263 case begline:
03264 case endline:
03265 case begbuf:
03266 case endbuf:
03267 case wordbound:
03268 case notwordbound:
03269 case wordbeg:
03270 case wordend:
03271 case push_dummy_failure:
03272 continue;
03273
03274
03275 case jump_n:
03276 case pop_failure_jump:
03277 case maybe_pop_jump:
03278 case jump:
03279 case jump_past_alt:
03280 case dummy_failure_jump:
03281 EXTRACT_NUMBER_AND_INCR (j, p);
03282 p += j;
03283 if (j > 0)
03284 continue;
03285
03286
03287
03288
03289
03290
03291 if ((re_opcode_t) *p != on_failure_jump
03292 && (re_opcode_t) *p != succeed_n)
03293 continue;
03294
03295 p++;
03296 EXTRACT_NUMBER_AND_INCR (j, p);
03297 p += j;
03298
03299
03300 if (!FAIL_STACK_EMPTY ()
03301 && fail_stack.stack[fail_stack.avail - 1].pointer == p)
03302 fail_stack.avail--;
03303
03304 continue;
03305
03306
03307 case on_failure_jump:
03308 case on_failure_keep_string_jump:
03309 handle_on_failure_jump:
03310 EXTRACT_NUMBER_AND_INCR (j, p);
03311
03312
03313
03314
03315
03316
03317
03318
03319 if (p + j < pend)
03320 {
03321 if (!PUSH_PATTERN_OP (p + j, fail_stack))
03322 {
03323 RESET_FAIL_STACK ();
03324 return -2;
03325 }
03326 }
03327 else
03328 bufp->can_be_null = 1;
03329
03330 if (succeed_n_p)
03331 {
03332 EXTRACT_NUMBER_AND_INCR (k, p);
03333 succeed_n_p = false;
03334 }
03335
03336 continue;
03337
03338
03339 case succeed_n:
03340
03341 p += 2;
03342
03343
03344 EXTRACT_NUMBER_AND_INCR (k, p);
03345 if (k == 0)
03346 {
03347 p -= 4;
03348 succeed_n_p = true;
03349 goto handle_on_failure_jump;
03350 }
03351 continue;
03352
03353
03354 case set_number_at:
03355 p += 4;
03356 continue;
03357
03358
03359 case start_memory:
03360 case stop_memory:
03361 p += 2;
03362 continue;
03363
03364
03365 default:
03366 abort ();
03367 }
03368
03369
03370
03371
03372
03373
03374
03375 path_can_be_null = false;
03376 p = pend;
03377 }
03378
03379
03380
03381 bufp->can_be_null |= path_can_be_null;
03382
03383 done:
03384 RESET_FAIL_STACK ();
03385 return 0;
03386 }
03387
03388
03389
03390
03391
03392
03393
03394
03395
03396
03397
03398
03399
03400
03401 void
03402 re_set_registers(struct re_pattern_buffer *bufp,
03403 struct re_registers *regs,
03404 unsigned num_regs,
03405 regoff_t *starts,
03406 regoff_t *ends)
03407 {
03408 if (num_regs)
03409 {
03410 bufp->regs_allocated = REGS_REALLOCATE;
03411 regs->num_regs = num_regs;
03412 regs->start = starts;
03413 regs->end = ends;
03414 }
03415 else
03416 {
03417 bufp->regs_allocated = REGS_UNALLOCATED;
03418 regs->num_regs = 0;
03419 regs->start = regs->end = (regoff_t *) 0;
03420 }
03421 }
03422
03423
03424
03425
03426
03427
03428 int
03429 re_search(struct re_pattern_buffer *bufp,
03430 const char *string,
03431 int size,
03432 int startpos,
03433 int range,
03434 struct re_registers *regs)
03435 {
03436 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
03437 regs, size);
03438 }
03439
03440
03441
03442
03443
03444
03445
03446
03447
03448
03449
03450
03451
03452
03453
03454
03455
03456
03457
03458
03459
03460
03461
03462 int
03463 re_search_2(struct re_pattern_buffer *bufp,
03464 const char *string1,
03465 int size1,
03466 const char *string2,
03467 int size2,
03468 int startpos,
03469 int range,
03470 struct re_registers *regs,
03471 int stop)
03472 {
03473 int val;
03474 register char *fastmap = bufp->fastmap;
03475 register RE_TRANSLATE_TYPE translate = bufp->translate;
03476 int total_size = size1 + size2;
03477 int endpos = startpos + range;
03478
03479
03480 if (startpos < 0 || startpos > total_size)
03481 return -1;
03482
03483
03484
03485
03486 if (endpos < 0)
03487 range = 0 - startpos;
03488 else if (endpos > total_size)
03489 range = total_size - startpos;
03490
03491
03492
03493 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
03494 {
03495 if (startpos > 0)
03496 return -1;
03497 else
03498 range = 1;
03499 }
03500
03501 #ifdef emacs
03502
03503
03504 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
03505 {
03506 range = PT - startpos;
03507 if (range <= 0)
03508 return -1;
03509 }
03510 #endif
03511
03512
03513 if (fastmap && !bufp->fastmap_accurate)
03514 if (re_compile_fastmap (bufp) == -2)
03515 return -2;
03516
03517
03518 for (;;)
03519 {
03520
03521
03522
03523
03524 if (fastmap && startpos < total_size && !bufp->can_be_null)
03525 {
03526 if (range > 0)
03527 {
03528 register const char *d;
03529 register int lim = 0;
03530 int irange = range;
03531
03532 if (startpos < size1 && startpos + range >= size1)
03533 lim = range - (size1 - startpos);
03534
03535 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
03536
03537
03538
03539 if (translate)
03540 while (range > lim
03541 && !fastmap[(unsigned char)
03542 translate[(unsigned char) *d++]])
03543 range--;
03544 else
03545 while (range > lim && !fastmap[(unsigned char) *d++])
03546 range--;
03547
03548 startpos += irange - range;
03549 }
03550 else
03551 {
03552 register char c = (size1 == 0 || startpos >= size1
03553 ? string2[startpos - size1]
03554 : string1[startpos]);
03555
03556 if (!fastmap[(unsigned char) TRANSLATE (c)])
03557 goto advance;
03558 }
03559 }
03560
03561
03562 if (range >= 0 && startpos == total_size && fastmap
03563 && !bufp->can_be_null)
03564 return -1;
03565
03566 val = re_match_2_internal (bufp, string1, size1, string2, size2,
03567 startpos, regs, stop);
03568 #ifndef REGEX_MALLOC
03569 #ifdef C_ALLOCA
03570 alloca (0);
03571 #endif
03572 #endif
03573
03574 if (val >= 0)
03575 return startpos;
03576
03577 if (val == -2)
03578 return -2;
03579
03580 advance:
03581 if (!range)
03582 break;
03583 else if (range > 0)
03584 {
03585 range--;
03586 startpos++;
03587 }
03588 else
03589 {
03590 range++;
03591 startpos--;
03592 }
03593 }
03594 return -1;
03595 }
03596
03597
03598
03599 #define POINTER_TO_OFFSET(ptr) \
03600 (FIRST_STRING_P (ptr) \
03601 ? ((regoff_t) ((ptr) - string1)) \
03602 : ((regoff_t) ((ptr) - string2 + size1)))
03603
03604
03605
03606 #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
03607
03608
03609
03610 #define PREFETCH() \
03611 while (d == dend) \
03612 { \
03613 \
03614 if (dend == end_match_2) \
03615 goto fail; \
03616 \
03617 d = string2; \
03618 dend = end_match_2; \
03619 }
03620
03621
03622
03623
03624 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
03625 #define AT_STRINGS_END(d) ((d) == end2)
03626
03627
03628
03629
03630
03631
03632 #define WORDCHAR_P(d) \
03633 (SYNTAX ((d) == end1 ? *string2 \
03634 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
03635 == Sword)
03636
03637
03638 #if 0
03639
03640
03641 #define AT_WORD_BOUNDARY(d) \
03642 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
03643 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
03644 #endif
03645
03646
03647 #ifdef MATCH_MAY_ALLOCATE
03648 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
03649 #define FREE_VARIABLES() \
03650 do { \
03651 REGEX_FREE_STACK (fail_stack.stack); \
03652 FREE_VAR ((void*) regstart); \
03653 FREE_VAR ((void*) regend); \
03654 FREE_VAR ((void*) old_regstart); \
03655 FREE_VAR ((void*) old_regend); \
03656 FREE_VAR ((void*) best_regstart); \
03657 FREE_VAR ((void*) best_regend); \
03658 FREE_VAR ((void*) reg_info); \
03659 FREE_VAR ((void*) reg_dummy); \
03660 FREE_VAR ((void*) reg_info_dummy); \
03661 } while (0)
03662 #else
03663 #define FREE_VARIABLES() ((void)0)
03664 #endif
03665
03666
03667
03668
03669
03670
03671
03672
03673 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
03674 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
03675
03676
03677
03678 #ifndef emacs
03679
03680
03681 int
03682 re_match(struct re_pattern_buffer *bufp,
03683 const char *string,
03684 int size,
03685 int pos,
03686 struct re_registers *regs)
03687 {
03688 int result = re_match_2_internal (bufp, NULL, 0, string, size,
03689 pos, regs, size);
03690 #ifndef REGEX_MALLOC
03691 #ifdef C_ALLOCA
03692 alloca (0);
03693 #endif
03694 #endif
03695 return result;
03696 }
03697 #endif
03698
03699 static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
03700 unsigned char *end,
03701 register_info_type *reg_info));
03702 static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
03703 unsigned char *end,
03704 register_info_type *reg_info));
03705 static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
03706 unsigned char *end,
03707 register_info_type *reg_info));
03708 static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
03709 int len, char *translate));
03710
03711
03712
03713
03714
03715
03716
03717
03718
03719
03720
03721
03722
03723
03724 int
03725 re_match_2(struct re_pattern_buffer *bufp,
03726 const char *string1,
03727 int size1,
03728 const char *string2,
03729 int size2,
03730 int pos,
03731 struct re_registers *regs,
03732 int stop)
03733 {
03734 int result = re_match_2_internal (bufp, string1, size1, string2, size2,
03735 pos, regs, stop);
03736 #ifndef REGEX_MALLOC
03737 #ifdef C_ALLOCA
03738 alloca (0);
03739 #endif
03740 #endif
03741 return result;
03742 }
03743
03744
03745
03746 static int
03747 re_match_2_internal(struct re_pattern_buffer *bufp,
03748 const char *string1,
03749 int size1,
03750 const char *string2,
03751 int size2,
03752 int pos,
03753 struct re_registers *regs,
03754 int stop)
03755 {
03756
03757 int mcnt;
03758 unsigned char *p1;
03759
03760
03761 const char *end1, *end2;
03762
03763
03764
03765 const char *end_match_1, *end_match_2;
03766
03767
03768 const char *d, *dend;
03769
03770
03771 unsigned char *p = bufp->buffer;
03772 register unsigned char *pend = p + bufp->used;
03773
03774
03775
03776 unsigned char *just_past_start_mem = 0;
03777
03778
03779 RE_TRANSLATE_TYPE translate = bufp->translate;
03780
03781
03782
03783
03784
03785
03786
03787
03788
03789
03790 #ifdef MATCH_MAY_ALLOCATE
03791 fail_stack_type fail_stack;
03792 #endif
03793 #ifdef DEBUG
03794 static unsigned failure_id = 0;
03795 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
03796 #endif
03797
03798 #ifdef REL_ALLOC
03799
03800
03801 fail_stack_elt_t *failure_stack_ptr;
03802 #endif
03803
03804
03805
03806
03807 size_t num_regs = bufp->re_nsub + 1;
03808
03809
03810 active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03811 active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03812
03813
03814
03815
03816
03817
03818
03819
03820 #ifdef MATCH_MAY_ALLOCATE
03821 const char **regstart, **regend;
03822 #endif
03823
03824
03825
03826
03827
03828
03829 #ifdef MATCH_MAY_ALLOCATE
03830 const char **old_regstart, **old_regend;
03831 #endif
03832
03833
03834
03835
03836
03837
03838
03839 #ifdef MATCH_MAY_ALLOCATE
03840 register_info_type *reg_info;
03841 #endif
03842
03843
03844
03845
03846
03847 unsigned best_regs_set = false;
03848 #ifdef MATCH_MAY_ALLOCATE
03849 const char **best_regstart, **best_regend;
03850 #endif
03851
03852
03853
03854
03855
03856
03857
03858
03859
03860 const char *match_end = NULL;
03861
03862
03863 int set_regs_matched_done = 0;
03864
03865
03866 #ifdef MATCH_MAY_ALLOCATE
03867 const char **reg_dummy;
03868 register_info_type *reg_info_dummy;
03869 #endif
03870
03871 #ifdef DEBUG
03872
03873 unsigned num_regs_pushed = 0;
03874 #endif
03875
03876 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
03877
03878 INIT_FAIL_STACK ();
03879
03880 #ifdef MATCH_MAY_ALLOCATE
03881
03882
03883
03884
03885
03886 if (bufp->re_nsub)
03887 {
03888 regstart = REGEX_TALLOC (num_regs, const char *);
03889 regend = REGEX_TALLOC (num_regs, const char *);
03890 old_regstart = REGEX_TALLOC (num_regs, const char *);
03891 old_regend = REGEX_TALLOC (num_regs, const char *);
03892 best_regstart = REGEX_TALLOC (num_regs, const char *);
03893 best_regend = REGEX_TALLOC (num_regs, const char *);
03894 reg_info = REGEX_TALLOC (num_regs, register_info_type);
03895 reg_dummy = REGEX_TALLOC (num_regs, const char *);
03896 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
03897
03898 if (!(regstart && regend && old_regstart && old_regend && reg_info
03899 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
03900 {
03901 FREE_VARIABLES ();
03902 return -2;
03903 }
03904 }
03905 else
03906 {
03907
03908
03909 regstart = regend = old_regstart = old_regend = best_regstart
03910 = best_regend = reg_dummy = NULL;
03911 reg_info = reg_info_dummy = (register_info_type *) NULL;
03912 }
03913 #endif
03914
03915
03916 if (pos < 0 || pos > size1 + size2)
03917 {
03918 FREE_VARIABLES ();
03919 return -1;
03920 }
03921
03922
03923
03924
03925 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
03926 {
03927 regstart[mcnt] = regend[mcnt]
03928 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
03929
03930 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
03931 IS_ACTIVE (reg_info[mcnt]) = 0;
03932 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03933 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03934 }
03935
03936
03937
03938 if (size2 == 0 && string1 != NULL)
03939 {
03940 string2 = string1;
03941 size2 = size1;
03942 string1 = 0;
03943 size1 = 0;
03944 }
03945 end1 = string1 + size1;
03946 end2 = string2 + size2;
03947
03948
03949 if (stop <= size1)
03950 {
03951 end_match_1 = string1 + stop;
03952 end_match_2 = string2;
03953 }
03954 else
03955 {
03956 end_match_1 = end1;
03957 end_match_2 = string2 + stop - size1;
03958 }
03959
03960
03961
03962
03963
03964
03965
03966 if (size1 > 0 && pos <= size1)
03967 {
03968 d = string1 + pos;
03969 dend = end_match_1;
03970 }
03971 else
03972 {
03973 d = string2 + pos - size1;
03974 dend = end_match_2;
03975 }
03976
03977 DEBUG_PRINT1 ("The compiled pattern is:\n");
03978 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
03979 DEBUG_PRINT1 ("The string to match is: `");
03980 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
03981 DEBUG_PRINT1 ("'\n");
03982
03983
03984
03985
03986 for (;;)
03987 {
03988 #ifdef _LIBC
03989 DEBUG_PRINT2 ("\n%p: ", p);
03990 #else
03991 DEBUG_PRINT2 ("\n0x%x: ", p);
03992 #endif
03993
03994 if (p == pend)
03995 {
03996 DEBUG_PRINT1 ("end of pattern ... ");
03997
03998
03999
04000 if (d != end_match_2)
04001 {
04002
04003
04004 boolean same_str_p = (FIRST_STRING_P (match_end)
04005 == MATCHING_IN_FIRST_STRING);
04006
04007 boolean best_match_p;
04008
04009
04010
04011 if (same_str_p)
04012 best_match_p = d > match_end;
04013 else
04014 best_match_p = !MATCHING_IN_FIRST_STRING;
04015
04016 DEBUG_PRINT1 ("backtracking.\n");
04017
04018 if (!FAIL_STACK_EMPTY ())
04019 {
04020
04021
04022 if (!best_regs_set || best_match_p)
04023 {
04024 best_regs_set = true;
04025 match_end = d;
04026
04027 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
04028
04029 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
04030 {
04031 best_regstart[mcnt] = regstart[mcnt];
04032 best_regend[mcnt] = regend[mcnt];
04033 }
04034 }
04035 goto fail;
04036 }
04037
04038
04039
04040
04041 else if (best_regs_set && !best_match_p)
04042 {
04043 restore_best_regs:
04044
04045
04046
04047
04048
04049 DEBUG_PRINT1 ("Restoring best registers.\n");
04050
04051 d = match_end;
04052 dend = ((d >= string1 && d <= end1)
04053 ? end_match_1 : end_match_2);
04054
04055 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
04056 {
04057 regstart[mcnt] = best_regstart[mcnt];
04058 regend[mcnt] = best_regend[mcnt];
04059 }
04060 }
04061 }
04062
04063 succeed_label:
04064 DEBUG_PRINT1 ("Accepting match.\n");
04065
04066
04067 if (regs && !bufp->no_sub)
04068 {
04069
04070 if (bufp->regs_allocated == REGS_UNALLOCATED)
04071 {
04072
04073
04074 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
04075 regs->start = TALLOC (regs->num_regs, regoff_t);
04076 regs->end = TALLOC (regs->num_regs, regoff_t);
04077 if (regs->start == NULL || regs->end == NULL)
04078 {
04079 FREE_VARIABLES ();
04080 return -2;
04081 }
04082 bufp->regs_allocated = REGS_REALLOCATE;
04083 }
04084 else if (bufp->regs_allocated == REGS_REALLOCATE)
04085 {
04086
04087
04088 if (regs->num_regs < num_regs + 1)
04089 {
04090 regs->num_regs = num_regs + 1;
04091 RETALLOC (regs->start, regs->num_regs, regoff_t);
04092 RETALLOC (regs->end, regs->num_regs, regoff_t);
04093 if (regs->start == NULL || regs->end == NULL)
04094 {
04095 FREE_VARIABLES ();
04096 return -2;
04097 }
04098 }
04099 }
04100 else
04101 {
04102
04103
04104 assert (bufp->regs_allocated == REGS_FIXED);
04105 }
04106
04107
04108
04109
04110 if (regs->num_regs > 0)
04111 {
04112 regs->start[0] = pos;
04113 regs->end[0] = (MATCHING_IN_FIRST_STRING
04114 ? ((regoff_t) (d - string1))
04115 : ((regoff_t) (d - string2 + size1)));
04116 }
04117
04118
04119
04120 for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
04121 mcnt++)
04122 {
04123 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
04124 regs->start[mcnt] = regs->end[mcnt] = -1;
04125 else
04126 {
04127 regs->start[mcnt]
04128 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
04129 regs->end[mcnt]
04130 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
04131 }
04132 }
04133
04134
04135
04136
04137
04138
04139 for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
04140 regs->start[mcnt] = regs->end[mcnt] = -1;
04141 }
04142
04143 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
04144 nfailure_points_pushed, nfailure_points_popped,
04145 nfailure_points_pushed - nfailure_points_popped);
04146 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
04147
04148 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
04149 ? string1
04150 : string2 - size1);
04151
04152 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
04153
04154 FREE_VARIABLES ();
04155 return mcnt;
04156 }
04157
04158
04159 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
04160 {
04161
04162
04163 case no_op:
04164 DEBUG_PRINT1 ("EXECUTING no_op.\n");
04165 break;
04166
04167 case succeed:
04168 DEBUG_PRINT1 ("EXECUTING succeed.\n");
04169 goto succeed_label;
04170
04171
04172
04173
04174 case exactn:
04175 mcnt = *p++;
04176 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
04177
04178
04179
04180 if (translate)
04181 {
04182 do
04183 {
04184 PREFETCH ();
04185 if ((unsigned char) translate[(unsigned char) *d++]
04186 != (unsigned char) *p++)
04187 goto fail;
04188 }
04189 while (--mcnt);
04190 }
04191 else
04192 {
04193 do
04194 {
04195 PREFETCH ();
04196 if (*d++ != (char) *p++) goto fail;
04197 }
04198 while (--mcnt);
04199 }
04200 SET_REGS_MATCHED ();
04201 break;
04202
04203
04204
04205 case anychar:
04206 DEBUG_PRINT1 ("EXECUTING anychar.\n");
04207
04208 PREFETCH ();
04209
04210 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
04211 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
04212 goto fail;
04213
04214 SET_REGS_MATCHED ();
04215 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
04216 d++;
04217 break;
04218
04219
04220 case charset:
04221 case charset_not:
04222 {
04223 register unsigned char c;
04224 boolean not = (re_opcode_t) *(p - 1) == charset_not;
04225
04226 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
04227
04228 PREFETCH ();
04229 c = TRANSLATE (*d);
04230
04231
04232
04233 if (c < (unsigned) (*p * BYTEWIDTH)
04234 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04235 not = !not;
04236
04237 p += 1 + *p;
04238
04239 if (!not) goto fail;
04240
04241 SET_REGS_MATCHED ();
04242 d++;
04243 break;
04244 }
04245
04246
04247
04248
04249
04250
04251
04252 case start_memory:
04253 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
04254
04255
04256 p1 = p;
04257
04258 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
04259 REG_MATCH_NULL_STRING_P (reg_info[*p])
04260 = group_match_null_string_p (&p1, pend, reg_info);
04261
04262
04263
04264
04265
04266
04267 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04268 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
04269 : regstart[*p];
04270 DEBUG_PRINT2 (" old_regstart: %d\n",
04271 POINTER_TO_OFFSET (old_regstart[*p]));
04272
04273 regstart[*p] = d;
04274 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
04275
04276 IS_ACTIVE (reg_info[*p]) = 1;
04277 MATCHED_SOMETHING (reg_info[*p]) = 0;
04278
04279
04280 set_regs_matched_done = 0;
04281
04282
04283 highest_active_reg = *p;
04284
04285
04286
04287 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04288 lowest_active_reg = *p;
04289
04290
04291 p += 2;
04292 just_past_start_mem = p;
04293
04294 break;
04295
04296
04297
04298
04299
04300 case stop_memory:
04301 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
04302
04303
04304
04305
04306
04307
04308 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04309 ? REG_UNSET (regend[*p]) ? d : regend[*p]
04310 : regend[*p];
04311 DEBUG_PRINT2 (" old_regend: %d\n",
04312 POINTER_TO_OFFSET (old_regend[*p]));
04313
04314 regend[*p] = d;
04315 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
04316
04317
04318 IS_ACTIVE (reg_info[*p]) = 0;
04319
04320
04321 set_regs_matched_done = 0;
04322
04323
04324
04325 if (lowest_active_reg == highest_active_reg)
04326 {
04327 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04328 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04329 }
04330 else
04331 {
04332
04333
04334
04335 unsigned char r = *p - 1;
04336 while (r > 0 && !IS_ACTIVE (reg_info[r]))
04337 r--;
04338
04339
04340
04341
04342
04343
04344
04345
04346 if (r == 0)
04347 {
04348 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04349 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04350 }
04351 else
04352 highest_active_reg = r;
04353 }
04354
04355
04356
04357
04358
04359
04360 if ((!MATCHED_SOMETHING (reg_info[*p])
04361 || just_past_start_mem == p - 1)
04362 && (p + 2) < pend)
04363 {
04364 boolean is_a_jump_n = false;
04365
04366 p1 = p + 2;
04367 mcnt = 0;
04368 switch ((re_opcode_t) *p1++)
04369 {
04370 case jump_n:
04371 is_a_jump_n = true;
04372 case pop_failure_jump:
04373 case maybe_pop_jump:
04374 case jump:
04375 case dummy_failure_jump:
04376 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04377 if (is_a_jump_n)
04378 p1 += 2;
04379 break;
04380
04381 default:
04382 ;
04383 }
04384 p1 += mcnt;
04385
04386
04387
04388
04389
04390
04391 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
04392 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
04393 {
04394
04395
04396
04397
04398
04399
04400
04401
04402
04403
04404 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
04405 {
04406 unsigned r;
04407
04408 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
04409
04410
04411 for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
04412 r++)
04413 {
04414 regstart[r] = old_regstart[r];
04415
04416
04417 if (old_regend[r] >= regstart[r])
04418 regend[r] = old_regend[r];
04419 }
04420 }
04421 p1++;
04422 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04423 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
04424
04425 goto fail;
04426 }
04427 }
04428
04429
04430 p += 2;
04431 break;
04432
04433
04434
04435
04436 case duplicate:
04437 {
04438 register const char *d2, *dend2;
04439 int regno = *p++;
04440 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
04441
04442
04443 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
04444 goto fail;
04445
04446
04447 d2 = regstart[regno];
04448
04449
04450
04451
04452
04453
04454 dend2 = ((FIRST_STRING_P (regstart[regno])
04455 == FIRST_STRING_P (regend[regno]))
04456 ? regend[regno] : end_match_1);
04457 for (;;)
04458 {
04459
04460
04461 while (d2 == dend2)
04462 {
04463 if (dend2 == end_match_2) break;
04464 if (dend2 == regend[regno]) break;
04465
04466
04467 d2 = string2;
04468 dend2 = regend[regno];
04469 }
04470
04471 if (d2 == dend2) break;
04472
04473
04474 PREFETCH ();
04475
04476
04477 mcnt = dend - d;
04478
04479
04480
04481 if (mcnt > dend2 - d2)
04482 mcnt = dend2 - d2;
04483
04484
04485
04486 if (translate
04487 ? bcmp_translate (d, d2, mcnt, translate)
04488 : bcmp (d, d2, mcnt))
04489 goto fail;
04490 d += mcnt, d2 += mcnt;
04491
04492
04493 SET_REGS_MATCHED ();
04494 }
04495 }
04496 break;
04497
04498
04499
04500
04501
04502 case begline:
04503 DEBUG_PRINT1 ("EXECUTING begline.\n");
04504
04505 if (AT_STRINGS_BEG (d))
04506 {
04507 if (!bufp->not_bol) break;
04508 }
04509 else if (d[-1] == '\n' && bufp->newline_anchor)
04510 {
04511 break;
04512 }
04513
04514 goto fail;
04515
04516
04517
04518 case endline:
04519 DEBUG_PRINT1 ("EXECUTING endline.\n");
04520
04521 if (AT_STRINGS_END (d))
04522 {
04523 if (!bufp->not_eol) break;
04524 }
04525
04526
04527 else if ((d == end1 ? *string2 : *d) == '\n'
04528 && bufp->newline_anchor)
04529 {
04530 break;
04531 }
04532 goto fail;
04533
04534
04535
04536 case begbuf:
04537 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
04538 if (AT_STRINGS_BEG (d))
04539 break;
04540 goto fail;
04541
04542
04543
04544 case endbuf:
04545 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
04546 if (AT_STRINGS_END (d))
04547 break;
04548 goto fail;
04549
04550
04551
04552
04553
04554
04555
04556
04557
04558
04559
04560
04561
04562
04563
04564
04565
04566
04567 case on_failure_keep_string_jump:
04568 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
04569
04570 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04571 #ifdef _LIBC
04572 DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
04573 #else
04574 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
04575 #endif
04576
04577 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
04578 break;
04579
04580
04581
04582
04583
04584
04585
04586
04587
04588
04589
04590
04591
04592
04593 case on_failure_jump:
04594 on_failure:
04595 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
04596
04597 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04598 #ifdef _LIBC
04599 DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
04600 #else
04601 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
04602 #endif
04603
04604
04605
04606
04607
04608
04609
04610
04611
04612
04613 p1 = p;
04614
04615
04616
04617
04618
04619 while (p1 < pend && (re_opcode_t) *p1 == no_op)
04620 p1++;
04621
04622 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
04623 {
04624
04625
04626
04627
04628 highest_active_reg = *(p1 + 1) + *(p1 + 2);
04629 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04630 lowest_active_reg = *(p1 + 1);
04631 }
04632
04633 DEBUG_PRINT1 (":\n");
04634 PUSH_FAILURE_POINT (p + mcnt, d, -2);
04635 break;
04636
04637
04638
04639
04640 case maybe_pop_jump:
04641 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04642 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
04643 {
04644 register unsigned char *p2 = p;
04645
04646
04647
04648
04649
04650
04651
04652
04653
04654
04655
04656
04657
04658
04659
04660
04661
04662
04663 while (1)
04664 {
04665 if (p2 + 2 < pend
04666 && ((re_opcode_t) *p2 == stop_memory
04667 || (re_opcode_t) *p2 == start_memory))
04668 p2 += 3;
04669 else if (p2 + 6 < pend
04670 && (re_opcode_t) *p2 == dummy_failure_jump)
04671 p2 += 6;
04672 else
04673 break;
04674 }
04675
04676 p1 = p + mcnt;
04677
04678
04679
04680
04681
04682 if (p2 == pend)
04683 {
04684
04685
04686
04687 p[-3] = (unsigned char) pop_failure_jump;
04688 DEBUG_PRINT1
04689 (" End of pattern: change to `pop_failure_jump'.\n");
04690 }
04691
04692 else if ((re_opcode_t) *p2 == exactn
04693 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
04694 {
04695 register unsigned char c
04696 = *p2 == (unsigned char) endline ? '\n' : p2[2];
04697
04698 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
04699 {
04700 p[-3] = (unsigned char) pop_failure_jump;
04701 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
04702 c, p1[5]);
04703 }
04704
04705 else if ((re_opcode_t) p1[3] == charset
04706 || (re_opcode_t) p1[3] == charset_not)
04707 {
04708 int not = (re_opcode_t) p1[3] == charset_not;
04709
04710 if (c < (unsigned char) (p1[4] * BYTEWIDTH)
04711 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04712 not = !not;
04713
04714
04715
04716 if (!not)
04717 {
04718 p[-3] = (unsigned char) pop_failure_jump;
04719 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
04720 }
04721 }
04722 }
04723 else if ((re_opcode_t) *p2 == charset)
04724 {
04725 #ifdef DEBUG
04726 register unsigned char c
04727 = *p2 == (unsigned char) endline ? '\n' : p2[2];
04728 #endif
04729
04730 #if 0
04731 if ((re_opcode_t) p1[3] == exactn
04732 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
04733 && (p2[2 + p1[5] / BYTEWIDTH]
04734 & (1 << (p1[5] % BYTEWIDTH)))))
04735 #else
04736 if ((re_opcode_t) p1[3] == exactn
04737 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
04738 && (p2[2 + p1[4] / BYTEWIDTH]
04739 & (1 << (p1[4] % BYTEWIDTH)))))
04740 #endif
04741 {
04742 p[-3] = (unsigned char) pop_failure_jump;
04743 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
04744 c, p1[5]);
04745 }
04746
04747 else if ((re_opcode_t) p1[3] == charset_not)
04748 {
04749 int idx;
04750
04751
04752 for (idx = 0; idx < (int) p2[1]; idx++)
04753 if (! (p2[2 + idx] == 0
04754 || (idx < (int) p1[4]
04755 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
04756 break;
04757
04758 if (idx == p2[1])
04759 {
04760 p[-3] = (unsigned char) pop_failure_jump;
04761 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
04762 }
04763 }
04764 else if ((re_opcode_t) p1[3] == charset)
04765 {
04766 int idx;
04767
04768
04769 for (idx = 0;
04770 idx < (int) p2[1] && idx < (int) p1[4];
04771 idx++)
04772 if ((p2[2 + idx] & p1[5 + idx]) != 0)
04773 break;
04774
04775 if (idx == p2[1] || idx == p1[4])
04776 {
04777 p[-3] = (unsigned char) pop_failure_jump;
04778 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
04779 }
04780 }
04781 }
04782 }
04783 p -= 2;
04784 if ((re_opcode_t) p[-1] != pop_failure_jump)
04785 {
04786 p[-1] = (unsigned char) jump;
04787 DEBUG_PRINT1 (" Match => jump.\n");
04788 goto unconditional_jump;
04789 }
04790
04791
04792
04793
04794
04795
04796
04797
04798
04799 case pop_failure_jump:
04800 {
04801
04802
04803
04804
04805
04806 active_reg_t dummy_low_reg, dummy_high_reg;
04807 unsigned char *pdummy;
04808 const char *sdummy;
04809
04810 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
04811 POP_FAILURE_POINT (sdummy, pdummy,
04812 dummy_low_reg, dummy_high_reg,
04813 reg_dummy, reg_dummy, reg_info_dummy);
04814 }
04815
04816
04817 unconditional_jump:
04818 #ifdef _LIBC
04819 DEBUG_PRINT2 ("\n%p: ", p);
04820 #else
04821 DEBUG_PRINT2 ("\n0x%x: ", p);
04822 #endif
04823
04824
04825
04826 case jump:
04827 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04828 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
04829 p += mcnt;
04830 #ifdef _LIBC
04831 DEBUG_PRINT2 ("(to %p).\n", p);
04832 #else
04833 DEBUG_PRINT2 ("(to 0x%x).\n", p);
04834 #endif
04835 break;
04836
04837
04838
04839
04840 case jump_past_alt:
04841 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
04842 goto unconditional_jump;
04843
04844
04845
04846
04847
04848
04849
04850 case dummy_failure_jump:
04851 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
04852
04853
04854 PUSH_FAILURE_POINT (0, 0, -2);
04855 goto unconditional_jump;
04856
04857
04858
04859
04860
04861
04862
04863 case push_dummy_failure:
04864 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
04865
04866
04867 PUSH_FAILURE_POINT (0, 0, -2);
04868 break;
04869
04870
04871
04872 case succeed_n:
04873 EXTRACT_NUMBER (mcnt, p + 2);
04874 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
04875
04876 assert (mcnt >= 0);
04877
04878 if (mcnt > 0)
04879 {
04880 mcnt--;
04881 p += 2;
04882 STORE_NUMBER_AND_INCR (p, mcnt);
04883 #ifdef _LIBC
04884 DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt);
04885 #else
04886 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt);
04887 #endif
04888 }
04889 else if (mcnt == 0)
04890 {
04891 #ifdef _LIBC
04892 DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2);
04893 #else
04894 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
04895 #endif
04896 p[2] = (unsigned char) no_op;
04897 p[3] = (unsigned char) no_op;
04898 goto on_failure;
04899 }
04900 break;
04901
04902 case jump_n:
04903 EXTRACT_NUMBER (mcnt, p + 2);
04904 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
04905
04906
04907 if (mcnt)
04908 {
04909 mcnt--;
04910 STORE_NUMBER (p + 2, mcnt);
04911 #ifdef _LIBC
04912 DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt);
04913 #else
04914 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt);
04915 #endif
04916 goto unconditional_jump;
04917 }
04918
04919 else
04920 p += 4;
04921 break;
04922
04923 case set_number_at:
04924 {
04925 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
04926
04927 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04928 p1 = p + mcnt;
04929 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04930 #ifdef _LIBC
04931 DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
04932 #else
04933 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
04934 #endif
04935 STORE_NUMBER (p1, mcnt);
04936 break;
04937 }
04938
04939 #if 0
04940
04941
04942
04943
04944
04945 case wordbound:
04946 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04947 if (AT_WORD_BOUNDARY (d))
04948 break;
04949 goto fail;
04950
04951 case notwordbound:
04952 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04953 if (AT_WORD_BOUNDARY (d))
04954 goto fail;
04955 break;
04956 #else
04957 case wordbound:
04958 {
04959 boolean prevchar, thischar;
04960
04961 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04962 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
04963 break;
04964
04965 prevchar = WORDCHAR_P (d - 1);
04966 thischar = WORDCHAR_P (d);
04967 if (prevchar != thischar)
04968 break;
04969 goto fail;
04970 }
04971
04972 case notwordbound:
04973 {
04974 boolean prevchar, thischar;
04975
04976 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04977 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
04978 goto fail;
04979
04980 prevchar = WORDCHAR_P (d - 1);
04981 thischar = WORDCHAR_P (d);
04982 if (prevchar != thischar)
04983 goto fail;
04984 break;
04985 }
04986 #endif
04987
04988 case wordbeg:
04989 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
04990 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
04991 break;
04992 goto fail;
04993
04994 case wordend:
04995 DEBUG_PRINT1 ("EXECUTING wordend.\n");
04996 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
04997 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
04998 break;
04999 goto fail;
05000
05001 #ifdef emacs
05002 case before_dot:
05003 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
05004 if (PTR_CHAR_POS ((unsigned char *) d) >= point)
05005 goto fail;
05006 break;
05007
05008 case at_dot:
05009 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
05010 if (PTR_CHAR_POS ((unsigned char *) d) != point)
05011 goto fail;
05012 break;
05013
05014 case after_dot:
05015 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
05016 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
05017 goto fail;
05018 break;
05019
05020 case syntaxspec:
05021 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
05022 mcnt = *p++;
05023 goto matchsyntax;
05024
05025 case wordchar:
05026 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
05027 mcnt = (int) Sword;
05028 matchsyntax:
05029 PREFETCH ();
05030
05031 d++;
05032 if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
05033 goto fail;
05034 SET_REGS_MATCHED ();
05035 break;
05036
05037 case notsyntaxspec:
05038 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
05039 mcnt = *p++;
05040 goto matchnotsyntax;
05041
05042 case notwordchar:
05043 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
05044 mcnt = (int) Sword;
05045 matchnotsyntax:
05046 PREFETCH ();
05047
05048 d++;
05049 if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
05050 goto fail;
05051 SET_REGS_MATCHED ();
05052 break;
05053
05054 #else
05055 case wordchar:
05056 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
05057 PREFETCH ();
05058 if (!WORDCHAR_P (d))
05059 goto fail;
05060 SET_REGS_MATCHED ();
05061 d++;
05062 break;
05063
05064 case notwordchar:
05065 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
05066 PREFETCH ();
05067 if (WORDCHAR_P (d))
05068 goto fail;
05069 SET_REGS_MATCHED ();
05070 d++;
05071 break;
05072 #endif
05073
05074 default:
05075 abort ();
05076 }
05077 continue;
05078
05079
05080
05081 fail:
05082 if (!FAIL_STACK_EMPTY ())
05083 {
05084 DEBUG_PRINT1 ("\nFAIL:\n");
05085 POP_FAILURE_POINT (d, p,
05086 lowest_active_reg, highest_active_reg,
05087 regstart, regend, reg_info);
05088
05089
05090 if (!p)
05091 goto fail;
05092
05093
05094 assert (p <= pend);
05095 if (p < pend)
05096 {
05097 boolean is_a_jump_n = false;
05098
05099
05100
05101 switch ((re_opcode_t) *p)
05102 {
05103 case jump_n:
05104 is_a_jump_n = true;
05105 case maybe_pop_jump:
05106 case pop_failure_jump:
05107 case jump:
05108 p1 = p + 1;
05109 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05110 p1 += mcnt;
05111
05112 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
05113 || (!is_a_jump_n
05114 && (re_opcode_t) *p1 == on_failure_jump))
05115 goto fail;
05116 break;
05117 default:
05118 ;
05119 }
05120 }
05121
05122 if (d >= string1 && d <= end1)
05123 dend = end_match_1;
05124 }
05125 else
05126 break;
05127 }
05128
05129 if (best_regs_set)
05130 goto restore_best_regs;
05131
05132 FREE_VARIABLES ();
05133
05134 return -1;
05135 }
05136
05137
05138
05139
05140
05141
05142
05143
05144
05145
05146
05147
05148
05149
05150 static boolean
05151 group_match_null_string_p(unsigned char **p,
05152 unsigned char *end,
05153 register_info_type *reg_info)
05154 {
05155 int mcnt;
05156
05157 unsigned char *p1 = *p + 2;
05158
05159 while (p1 < end)
05160 {
05161
05162
05163
05164
05165 switch ((re_opcode_t) *p1)
05166 {
05167
05168 case on_failure_jump:
05169 p1++;
05170 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05171
05172
05173
05174
05175 if (mcnt >= 0)
05176 {
05177
05178
05179
05180
05181
05182
05183
05184
05185
05186
05187
05188
05189
05190
05191
05192
05193
05194
05195 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
05196 {
05197
05198
05199
05200
05201 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
05202 reg_info))
05203 return false;
05204
05205
05206
05207 p1 += mcnt;
05208
05209
05210
05211 if ((re_opcode_t) *p1 != on_failure_jump)
05212 break;
05213
05214
05215
05216 p1++;
05217 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05218 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
05219 {
05220
05221 p1 -= 3;
05222 break;
05223 }
05224 }
05225
05226
05227
05228
05229 EXTRACT_NUMBER (mcnt, p1 - 2);
05230
05231 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
05232 return false;
05233
05234 p1 += mcnt;
05235 }
05236 break;
05237
05238
05239 case stop_memory:
05240 assert (p1[1] == **p);
05241 *p = p1 + 2;
05242 return true;
05243
05244
05245 default:
05246 if (!common_op_match_null_string_p (&p1, end, reg_info))
05247 return false;
05248 }
05249 }
05250
05251 return false;
05252 }
05253
05254
05255
05256
05257
05258
05259 static boolean
05260 alt_match_null_string_p(unsigned char *p,
05261 unsigned char *end,
05262 register_info_type *reg_info)
05263 {
05264 int mcnt;
05265 unsigned char *p1 = p;
05266
05267 while (p1 < end)
05268 {
05269
05270
05271
05272 switch ((re_opcode_t) *p1)
05273 {
05274
05275 case on_failure_jump:
05276 p1++;
05277 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05278 p1 += mcnt;
05279 break;
05280
05281 default:
05282 if (!common_op_match_null_string_p (&p1, end, reg_info))
05283 return false;
05284 }
05285 }
05286
05287 return true;
05288 }
05289
05290
05291
05292
05293
05294
05295
05296 static boolean
05297 common_op_match_null_string_p(unsigned char **p,
05298 unsigned char *end,
05299 register_info_type *reg_info)
05300 {
05301 int mcnt;
05302 boolean ret;
05303 int reg_no;
05304 unsigned char *p1 = *p;
05305
05306 switch ((re_opcode_t) *p1++)
05307 {
05308 case no_op:
05309 case begline:
05310 case endline:
05311 case begbuf:
05312 case endbuf:
05313 case wordbeg:
05314 case wordend:
05315 case wordbound:
05316 case notwordbound:
05317 #ifdef emacs
05318 case before_dot:
05319 case at_dot:
05320 case after_dot:
05321 #endif
05322 break;
05323
05324 case start_memory:
05325 reg_no = *p1;
05326 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
05327 ret = group_match_null_string_p (&p1, end, reg_info);
05328
05329
05330
05331
05332 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
05333 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
05334
05335 if (!ret)
05336 return false;
05337 break;
05338
05339
05340 case jump:
05341 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05342 if (mcnt >= 0)
05343 p1 += mcnt;
05344 else
05345 return false;
05346 break;
05347
05348 case succeed_n:
05349
05350 p1 += 2;
05351 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05352
05353 if (mcnt == 0)
05354 {
05355 p1 -= 4;
05356 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05357 p1 += mcnt;
05358 }
05359 else
05360 return false;
05361 break;
05362
05363 case duplicate:
05364 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
05365 return false;
05366 break;
05367
05368 case set_number_at:
05369 p1 += 4;
05370
05371 default:
05372
05373 return false;
05374 }
05375
05376 *p = p1;
05377 return true;
05378 }
05379
05380
05381
05382
05383
05384 static int
05385 bcmp_translate(const char *s1,
05386 const char *s2,
05387 register int len,
05388 RE_TRANSLATE_TYPE translate)
05389 {
05390 register const unsigned char *p1 = (const unsigned char *) s1;
05391 register const unsigned char *p2 = (const unsigned char *) s2;
05392 while (len)
05393 {
05394 if (translate[*p1++] != translate[*p2++]) return 1;
05395 len--;
05396 }
05397 return 0;
05398 }
05399
05400
05401
05402
05403
05404
05405
05406
05407
05408
05409
05410
05411 const char *
05412 re_compile_pattern(const char *pattern,
05413 size_t length,
05414 struct re_pattern_buffer *bufp)
05415 {
05416 reg_errcode_t ret;
05417
05418
05419
05420 bufp->regs_allocated = REGS_UNALLOCATED;
05421
05422
05423
05424
05425 bufp->no_sub = 0;
05426
05427
05428 bufp->newline_anchor = 1;
05429
05430 ret = regex_compile (pattern, length, re_syntax_options, bufp);
05431
05432 if (!ret)
05433 return NULL;
05434 return gettext (re_error_msgid[(int) ret]);
05435 }
05436
05437
05438
05439
05440 #if defined (_REGEX_RE_COMP) || defined (_LIBC)
05441
05442
05443 static struct re_pattern_buffer re_comp_buf;
05444
05445 char *
05446 #ifdef _LIBC
05447
05448
05449
05450 weak_function
05451 #endif
05452 re_comp (s)
05453 const char *s;
05454 {
05455 reg_errcode_t ret;
05456
05457 if (!s)
05458 {
05459 if (!re_comp_buf.buffer)
05460 return gettext ("No previous regular expression");
05461 return 0;
05462 }
05463
05464 if (!re_comp_buf.buffer)
05465 {
05466 re_comp_buf.buffer = (unsigned char *) malloc (200);
05467 if (re_comp_buf.buffer == NULL)
05468 return gettext (re_error_msgid[(int) REG_ESPACE]);
05469 re_comp_buf.allocated = 200;
05470
05471 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
05472 if (re_comp_buf.fastmap == NULL)
05473 return gettext (re_error_msgid[(int) REG_ESPACE]);
05474 }
05475
05476
05477
05478
05479
05480 re_comp_buf.newline_anchor = 1;
05481
05482 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
05483
05484 if (!ret)
05485 return NULL;
05486
05487
05488 return (char *) gettext (re_error_msgid[(int) ret]);
05489 }
05490
05491
05492 int
05493 #ifdef _LIBC
05494 weak_function
05495 #endif
05496 re_exec (s)
05497 const char *s;
05498 {
05499 const int len = strlen (s);
05500 return
05501 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
05502 }
05503
05504 #endif
05505
05506
05507
05508 #ifndef emacs
05509
05510
05511
05512
05513
05514
05515
05516
05517
05518
05519
05520
05521
05522
05523
05524
05525
05526
05527
05528
05529
05530
05531
05532
05533
05534
05535
05536
05537
05538
05539
05540
05541
05542
05543
05544 int
05545 regcomp(regex_t *preg,
05546 const char *pattern,
05547 int cflags)
05548 {
05549 reg_errcode_t ret;
05550 reg_syntax_t syntax
05551 = (cflags & REG_EXTENDED) ?
05552 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
05553
05554 #ifdef DEBUG
05555 debug=0;
05556 DEBUG_PRINT1("EXECUTING regcomp");
05557 debug=0;
05558 #endif
05559
05560 preg->buffer = 0;
05561 preg->allocated = 0;
05562 preg->used = 0;
05563
05564
05565
05566
05567
05568 preg->fastmap = 0;
05569
05570 if (cflags & REG_ICASE)
05571 {
05572 unsigned i;
05573
05574 preg->translate
05575 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
05576 * sizeof (*(RE_TRANSLATE_TYPE)0));
05577 if (preg->translate == NULL)
05578 return (int) REG_ESPACE;
05579
05580
05581 for (i = 0; i < CHAR_SET_SIZE; i++)
05582 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
05583 }
05584 else
05585 preg->translate = NULL;
05586
05587
05588 if (cflags & REG_NEWLINE)
05589 {
05590 syntax &= ~RE_DOT_NEWLINE;
05591 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
05592
05593 preg->newline_anchor = 1;
05594 }
05595 else
05596 preg->newline_anchor = 0;
05597
05598 preg->no_sub = !!(cflags & REG_NOSUB);
05599
05600
05601
05602 ret = regex_compile (pattern, strlen (pattern), syntax, preg);
05603
05604
05605
05606 if (ret == REG_ERPAREN) ret = REG_EPAREN;
05607
05608
05609
05610 return (int) ret;
05611 }
05612
05613
05614
05615
05616
05617
05618
05619
05620
05621
05622
05623
05624
05625
05626
05627
05628 int
05629 regexec(const regex_t *preg,
05630 const char *string,
05631 size_t nmatch,
05632 regmatch_t pmatch[],
05633 int eflags)
05634 {
05635 int ret;
05636 struct re_registers regs;
05637 regex_t private_preg;
05638 int len = strlen (string);
05639 boolean want_reg_info = !preg->no_sub && nmatch > 0;
05640
05641 private_preg = *preg;
05642
05643 private_preg.not_bol = !!(eflags & REG_NOTBOL);
05644 private_preg.not_eol = !!(eflags & REG_NOTEOL);
05645
05646
05647
05648
05649 private_preg.regs_allocated = REGS_FIXED;
05650
05651 if (want_reg_info)
05652 {
05653 regs.num_regs = nmatch;
05654 regs.start = TALLOC (nmatch, regoff_t);
05655 regs.end = TALLOC (nmatch, regoff_t);
05656 if (regs.start == NULL || regs.end == NULL)
05657 return (int) REG_NOMATCH;
05658 }
05659
05660
05661 ret = re_search (&private_preg, string, len,
05662 0, len,
05663 want_reg_info ? ®s : (struct re_registers *) 0);
05664
05665
05666 if (want_reg_info)
05667 {
05668 if (ret >= 0)
05669 {
05670 unsigned r;
05671
05672 for (r = 0; r < nmatch; r++)
05673 {
05674 pmatch[r].rm_so = regs.start[r];
05675 pmatch[r].rm_eo = regs.end[r];
05676 }
05677 }
05678
05679
05680 free (regs.start);
05681 free (regs.end);
05682 }
05683
05684
05685 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
05686 }
05687
05688
05689
05690
05691
05692 size_t
05693 regerror(int errcode,
05694 const regex_t *preg,
05695 char *errbuf,
05696 size_t errbuf_size)
05697 {
05698 const char *msg;
05699 size_t msg_size;
05700
05701 if (errcode < 0
05702 || errcode >= (int) (sizeof (re_error_msgid)
05703 / sizeof (re_error_msgid[0])))
05704
05705
05706
05707
05708 abort ();
05709
05710 msg = gettext (re_error_msgid[errcode]);
05711
05712 msg_size = strlen (msg) + 1;
05713
05714 if (errbuf_size != 0)
05715 {
05716 if (msg_size > errbuf_size)
05717 {
05718 strncpy (errbuf, msg, errbuf_size - 1);
05719 errbuf[errbuf_size - 1] = 0;
05720 }
05721 else
05722 strcpy (errbuf, msg);
05723 }
05724
05725 return msg_size;
05726 }
05727
05728
05729
05730
05731 void
05732 regfree(regex_t *preg)
05733 {
05734 if (preg->buffer != NULL)
05735 free (preg->buffer);
05736 preg->buffer = NULL;
05737
05738 preg->allocated = 0;
05739 preg->used = 0;
05740
05741 if (preg->fastmap != NULL)
05742 free (preg->fastmap);
05743 preg->fastmap = NULL;
05744 preg->fastmap_accurate = 0;
05745
05746 if (preg->translate != NULL)
05747 free (preg->translate);
05748 preg->translate = NULL;
05749 }
05750
05751 #endif