/misc/src/release/graphviz-2.18-1/src/graphviz-2.18/lib/gvc/regex_win32.c

Go to the documentation of this file.
00001 /* #define DEBUG */
00002 /* LGPLd GNU regex for Native WIN32 */
00003 
00004 /* Part of the ht://Dig package   <http://www.htdig.org/> */
00005 /* Copyright (c) 2003 The ht://Dig Group */
00006 /* For copyright details, see the file COPYING in your distribution */
00007 /* or the GNU Library General Public License (LGPL) version 2 or later or later */
00008 /* <http://www.gnu.org/copyleft/lgpl.html> */
00009 
00010 /* Added June 2003 Neal Richter, RightNow Technologies */
00011 
00012 /* note that this version is significantly different from the original  */
00013 /* version 0.12 GNU source code.  It compiles and works on Native WIN32. */
00014 
00015 /* Extended regular expression matching and search library,
00016    version 0.12.
00017    (Implements POSIX draft P1003.2/D11.2, except for some of the
00018    internationalization features.)
00019 
00020    Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
00021 
00022    This file is part of the GNU C Library.  Its master source is NOT part of
00023    the C library, however.  The master source lives in /gd/gnu/lib.
00024 
00025    The GNU C Library is free software; you can redistribute it and/or
00026    modify it under the terms of the GNU Library General Public License as
00027    published by the Free Software Foundation; either version 2 of the
00028    License, or (at your option) any later version.
00029 
00030    The GNU C Library is distributed in the hope that it will be useful,
00031    but WITHOUT ANY WARRANTY; without even the implied warranty of
00032    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00033    Library General Public License for more details.
00034 
00035    You should have received a copy of the GNU Library General Public
00036    License along with the GNU C Library; see the file COPYING.LIB.  If not,
00037    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00038    Boston, MA 02111-1307, USA.  */
00039 
00040 #if defined(_WIN32)
00041 #pragma warning(disable: 4018 4101)
00042 #endif
00043 
00044 /* AIX requires this to be the first thing in the file. */
00045 #if defined (_AIX) && !defined (REGEX_MALLOC)
00046 #pragma alloca
00047 #endif
00048 
00049 #undef  _GNU_SOURCE
00050 #define _GNU_SOURCE
00051 
00052 #if defined(LINUX)
00053 #define STDC_HEADERS
00054 #endif
00055 
00056 #if defined(STDC_HEADERS) && !defined(emacs)
00057 #include <stddef.h>
00058 #else
00059 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
00060 #include <sys/types.h>
00061 #endif
00062 
00063 /* For platform which support the ISO C amendement 1 functionality we
00064    support user defined character classes.  */
00065 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
00066 # include <wctype.h>
00067 # include <wchar.h>
00068 #endif
00069 
00070 /* This is for other GNU distributions with internationalized messages.  */
00071 #undef HAVE_LIBINTL_H
00072 #if HAVE_LIBINTL_H || defined (_LIBC)
00073 # include <libintl.h>
00074 #else
00075 # define gettext(msgid) (msgid)
00076 #endif
00077 
00078 #ifndef gettext_noop
00079 /* This define is so xgettext can find the internationalizable
00080    strings.  */
00081 #define gettext_noop(String) String
00082 #endif
00083 
00084 /* The `emacs' switch turns on certain matching commands
00085    that make sense only in Emacs. */
00086 #ifdef emacs
00087 
00088 #include "lisp.h"
00089 #include "buffer.h"
00090 #include "syntax.h"
00091 
00092 #else  /* not emacs */
00093 
00094 /* If we are not linking with Emacs proper,
00095    we can't use the relocating allocator
00096    even if config.h says that we can.  */
00097 #undef REL_ALLOC
00098 
00099 #if defined (STDC_HEADERS) || defined (_LIBC) || defined(_WIN32)
00100 #include <stdlib.h>
00101 #else
00102 char *malloc ();
00103 char *realloc ();
00104 void free();
00105 #endif
00106 
00107 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
00108    If nothing else has been done, use the method below.  */
00109 #ifdef INHIBIT_STRING_HEADER
00110 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
00111 #if !defined (bzero) && !defined (bcopy)
00112 #undef INHIBIT_STRING_HEADER
00113 #endif
00114 #endif
00115 #endif
00116 
00117 #include <string.h>
00118 
00119 /* This is the normal way of making sure we have a bcopy and a bzero.
00120    This is used in most programs--a few other programs avoid this
00121    by defining INHIBIT_STRING_HEADER.  */
00122 #ifndef INHIBIT_STRING_HEADER
00123 #if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC) || defined (_WIN32)
00124 #ifndef bcmp
00125 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
00126 #endif
00127 #ifndef bcopy
00128 #define bcopy(s, d, n)  memcpy ((d), (s), (n))
00129 #endif
00130 #ifndef bzero
00131 #define bzero(s, n)     memset ((s), 0, (n))
00132 #endif
00133 #else
00134 #include <strings.h>
00135 #endif
00136 #endif
00137 
00138 /* Define the syntax stuff for <, >, etc.  */
00139 
00140 /* This must be nonzero for the wordchar and notwordchar pattern
00141    commands in re_match_2.  */
00142 #ifndef Sword
00143 #define Sword 1
00144 #endif
00145 
00146 #ifdef SWITCH_ENUM_BUG
00147 #define SWITCH_ENUM_CAST(x) ((int)(x))
00148 #else
00149 #define SWITCH_ENUM_CAST(x) (x)
00150 #endif
00151 
00152 #ifdef SYNTAX_TABLE
00153 
00154 extern char *re_syntax_table;
00155 
00156 #else /* not SYNTAX_TABLE */
00157 
00158 /* How many characters in the character set.  */
00159 #define CHAR_SET_SIZE 256
00160 
00161 static char re_syntax_table[CHAR_SET_SIZE];
00162 
00163 static void
00164 init_syntax_once ()
00165 {
00166     register int c;
00167     static int done = 0;
00168 
00169     if (done)
00170         return;
00171 
00172     bzero (re_syntax_table, sizeof re_syntax_table);
00173 
00174     for (c = 'a'; c <= 'z'; c++)
00175         re_syntax_table[c] = Sword;
00176 
00177     for (c = 'A'; c <= 'Z'; c++)
00178         re_syntax_table[c] = Sword;
00179 
00180     for (c = '0'; c <= '9'; c++)
00181         re_syntax_table[c] = Sword;
00182 
00183     re_syntax_table['_'] = Sword;
00184 
00185     done = 1;
00186 }
00187 
00188 #endif /* not SYNTAX_TABLE */
00189 
00190 #define SYNTAX(c) re_syntax_table[c]
00191 
00192 #endif /* not emacs */
00193 
00194 /* Get the interface, including the syntax bits.  */
00195 /* #include "regex.h" */
00196 #include "regex_win32.h"
00197 
00198 /* isalpha etc. are used for the character classes.  */
00199 #include <ctype.h>
00200 
00201 /* Jim Meyering writes:
00202 
00203    "... Some ctype macros are valid only for character codes that
00204    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
00205    using /bin/cc or gcc but without giving an ansi option).  So, all
00206    ctype uses should be through macros like ISPRINT...  If
00207    STDC_HEADERS is defined, then autoconf has verified that the ctype
00208    macros don't need to be guarded with references to isascii. ...
00209    Defining isascii to 1 should let any compiler worth its salt
00210    eliminate the && through constant folding."  */
00211 
00212 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
00213 #define ISASCII(c) 1
00214 #else
00215 #define ISASCII(c) isascii(c)
00216 #endif
00217 
00218 #ifdef isblank
00219 #define ISBLANK(c) (ISASCII (c) && isblank (c))
00220 #else
00221 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00222 #endif
00223 #ifdef isgraph
00224 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
00225 #else
00226 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
00227 #endif
00228 
00229 #define ISPRINT(c) (ISASCII (c) && isprint (c))
00230 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
00231 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
00232 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
00233 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
00234 #define ISLOWER(c) (ISASCII (c) && islower (c))
00235 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
00236 #define ISSPACE(c) (ISASCII (c) && isspace (c))
00237 #define ISUPPER(c) (ISASCII (c) && isupper (c))
00238 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
00239 
00240 #ifndef NULL
00241 #define NULL (void *)0
00242 #endif
00243 
00244 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
00245    since ours (we hope) works properly with all combinations of
00246    machines, compilers, `char' and `unsigned char' argument types.
00247    (Per Bothner suggested the basic approach.)  */
00248 #undef SIGN_EXTEND_CHAR
00249 #if __STDC__
00250 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00251 #else  /* not __STDC__ */
00252 /* As in Harbison and Steele.  */
00253 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00254 #endif
00255 
00256 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
00257    use `alloca' instead of `malloc'.  This is because using malloc in
00258    re_search* or re_match* could cause memory leaks when C-g is used in
00259    Emacs; also, malloc is slower and causes storage fragmentation.  On
00260    the other hand, malloc is more portable, and easier to debug.
00261 
00262    Because we sometimes use alloca, some routines have to be macros,
00263    not functions -- `alloca'-allocated space disappears at the end of the
00264    function it is called in.  */
00265 
00266 #if defined(REGEX_MALLOC) || defined(_WIN32)
00267 
00268 #define REGEX_ALLOCATE malloc
00269 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00270 #define REGEX_FREE free
00271 #define REGEX_MALLOC
00272 
00273 #else /* not REGEX_MALLOC  */
00274 
00275 /* Emacs already defines alloca, sometimes.  */
00276 #ifndef alloca
00277 
00278 /* Make alloca work the best possible way.  */
00279 #ifdef __GNUC__
00280 #define alloca __builtin_alloca
00281 #else /* not __GNUC__ */
00282 #if HAVE_ALLOCA_H
00283 #include <alloca.h>
00284 #else /* not __GNUC__ or HAVE_ALLOCA_H */
00285 #if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
00286 #ifndef _AIX /* Already did AIX, up at the top.  */
00287 char *alloca ();
00288 #endif /* not _AIX */
00289 #endif
00290 #endif /* not HAVE_ALLOCA_H */
00291 #endif /* not __GNUC__ */
00292 
00293 #endif /* not alloca */
00294 
00295 #define REGEX_ALLOCATE alloca
00296 
00297 /* Assumes a `char *destination' variable.  */
00298 #define REGEX_REALLOCATE(source, osize, nsize)                          \
00299   (destination = (char *) alloca (nsize),                               \
00300    bcopy (source, destination, osize),                                  \
00301    destination)
00302 
00303 /* No need to do anything to free, after alloca.  */
00304 #define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
00305 
00306 #endif /* not REGEX_MALLOC */
00307 
00308 /* Define how to allocate the failure stack.  */
00309 
00310 #if defined (REL_ALLOC) && defined (REGEX_MALLOC)
00311 
00312 #define REGEX_ALLOCATE_STACK(size)                              \
00313   r_alloc (&failure_stack_ptr, (size))
00314 #define REGEX_REALLOCATE_STACK(source, osize, nsize)            \
00315   r_re_alloc (&failure_stack_ptr, (nsize))
00316 #define REGEX_FREE_STACK(ptr)                                   \
00317   r_alloc_free (&failure_stack_ptr)
00318 
00319 #else /* not using relocating allocator */
00320 
00321 #ifdef REGEX_MALLOC
00322 
00323 #define REGEX_ALLOCATE_STACK malloc
00324 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
00325 #define REGEX_FREE_STACK free
00326 
00327 #else /* not REGEX_MALLOC */
00328 
00329 #define REGEX_ALLOCATE_STACK alloca
00330 
00331 #define REGEX_REALLOCATE_STACK(source, osize, nsize)                    \
00332    REGEX_REALLOCATE (source, osize, nsize)
00333 /* No need to explicitly free anything.  */
00334 #define REGEX_FREE_STACK(arg)
00335 
00336 #endif /* not REGEX_MALLOC */
00337 #endif /* not using relocating allocator */
00338 
00339 
00340 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
00341    `string1' or just past its end.  This works if PTR is NULL, which is
00342    a good thing.  */
00343 #define FIRST_STRING_P(ptr)                                     \
00344   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00345 
00346 /* (Re)Allocate N items of type T using malloc, or fail.  */
00347 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00348 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00349 #define RETALLOC_IF(addr, n, t) \
00350   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
00351 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00352 
00353 #define BYTEWIDTH 8 /* In bits.  */
00354 
00355 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00356 
00357 #undef MAX
00358 #undef MIN
00359 #define MAX(a, b) ((a) > (b) ? (a) : (b))
00360 #define MIN(a, b) ((a) < (b) ? (a) : (b))
00361 
00362 /* typedef char boolean; */
00363 #define false 0
00364 #define true 1
00365 
00366 static int
00367 re_match_2_internal(struct re_pattern_buffer *bufp,
00368                     const char *string1,
00369                     int size1,
00370                     const char *string2,
00371                     int size2,
00372                     int pos,
00373                     struct re_registers *regs,
00374                     int stop);
00375 
00376 /* These are the command codes that appear in compiled regular
00377    expressions.  Some opcodes are followed by argument bytes.  A
00378    command code can specify any interpretation whatsoever for its
00379    arguments.  Zero bytes may appear in the compiled regular expression.  */
00380 
00381 typedef enum
00382 {
00383     no_op = 0,
00384 
00385     /* Succeed right away--no more backtracking.  */
00386     succeed,
00387 
00388     /* Followed by one byte giving n, then by n literal bytes.  */
00389     exactn,
00390 
00391     /* Matches any (more or less) character.  */
00392     anychar,
00393 
00394     /* Matches any one char belonging to specified set.  First
00395        following byte is number of bitmap bytes.  Then come bytes
00396        for a bitmap saying which chars are in.  Bits in each byte
00397        are ordered low-bit-first.  A character is in the set if its
00398        bit is 1.  A character too large to have a bit in the map is
00399        automatically not in the set.  */
00400     charset,
00401 
00402     /* Same parameters as charset, but match any character that is
00403        not one of those specified.  */
00404     charset_not,
00405 
00406     /* Start remembering the text that is matched, for storing in a
00407        register.  Followed by one byte with the register number, in
00408        the range 0 to one less than the pattern buffer's re_nsub
00409        field.  Then followed by one byte with the number of groups
00410        inner to this one.  (This last has to be part of the
00411        start_memory only because we need it in the on_failure_jump
00412        of re_match_2.)  */
00413     start_memory,
00414 
00415     /* Stop remembering the text that is matched and store it in a
00416        memory register.  Followed by one byte with the register
00417        number, in the range 0 to one less than `re_nsub' in the
00418        pattern buffer, and one byte with the number of inner groups,
00419        just like `start_memory'.  (We need the number of inner
00420        groups here because we don't have any easy way of finding the
00421        corresponding start_memory when we're at a stop_memory.)  */
00422     stop_memory,
00423 
00424     /* Match a duplicate of something remembered. Followed by one
00425        byte containing the register number.  */
00426     duplicate,
00427 
00428     /* Fail unless at beginning of line.  */
00429     begline,
00430 
00431     /* Fail unless at end of line.  */
00432     endline,
00433 
00434     /* Succeeds if at beginning of buffer (if emacs) or at beginning
00435        of string to be matched (if not).  */
00436     begbuf,
00437 
00438     /* Analogously, for end of buffer/string.  */
00439     endbuf,
00440 
00441     /* Followed by two byte relative address to which to jump.  */
00442     jump,
00443 
00444     /* Same as jump, but marks the end of an alternative.  */
00445     jump_past_alt,
00446 
00447     /* Followed by two-byte relative address of place to resume at
00448        in case of failure.  */
00449     on_failure_jump,
00450 
00451     /* Like on_failure_jump, but pushes a placeholder instead of the
00452        current string position when executed.  */
00453     on_failure_keep_string_jump,
00454 
00455     /* Throw away latest failure point and then jump to following
00456        two-byte relative address.  */
00457     pop_failure_jump,
00458 
00459     /* Change to pop_failure_jump if know won't have to backtrack to
00460        match; otherwise change to jump.  This is used to jump
00461        back to the beginning of a repeat.  If what follows this jump
00462        clearly won't match what the repeat does, such that we can be
00463        sure that there is no use backtracking out of repetitions
00464        already matched, then we change it to a pop_failure_jump.
00465        Followed by two-byte address.  */
00466     maybe_pop_jump,
00467 
00468     /* Jump to following two-byte address, and push a dummy failure
00469        point. This failure point will be thrown away if an attempt
00470        is made to use it for a failure.  A `+' construct makes this
00471        before the first repeat.  Also used as an intermediary kind
00472        of jump when compiling an alternative.  */
00473     dummy_failure_jump,
00474 
00475     /* Push a dummy failure point and continue.  Used at the end of
00476        alternatives.  */
00477     push_dummy_failure,
00478 
00479     /* Followed by two-byte relative address and two-byte number n.
00480        After matching N times, jump to the address upon failure.  */
00481     succeed_n,
00482 
00483     /* Followed by two-byte relative address, and two-byte number n.
00484        Jump to the address N times, then fail.  */
00485     jump_n,
00486 
00487     /* Set the following two-byte relative address to the
00488        subsequent two-byte number.  The address *includes* the two
00489        bytes of number.  */
00490     set_number_at,
00491 
00492     wordchar,   /* Matches any word-constituent character.  */
00493     notwordchar,        /* Matches any char that is not a word-constituent.  */
00494 
00495     wordbeg,    /* Succeeds if at word beginning.  */
00496     wordend,    /* Succeeds if at word end.  */
00497 
00498     wordbound,  /* Succeeds if at a word boundary.  */
00499     notwordbound        /* Succeeds if not at a word boundary.  */
00500 
00501 #ifdef emacs
00502     ,before_dot,        /* Succeeds if before point.  */
00503     at_dot,     /* Succeeds if at point.  */
00504     after_dot,  /* Succeeds if after point.  */
00505 
00506     /* Matches any character whose syntax is specified.  Followed by
00507        a byte which contains a syntax code, e.g., Sword.  */
00508     syntaxspec,
00509 
00510     /* Matches any character whose syntax is not that specified.  */
00511     notsyntaxspec
00512 #endif /* emacs */
00513 } re_opcode_t;
00514 
00515 /* Common operations on the compiled pattern.  */
00516 
00517 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
00518 
00519 #define STORE_NUMBER(destination, number)                               \
00520   do {                                                                  \
00521     (destination)[0] = (number) & 0377;                                 \
00522     (destination)[1] = (number) >> 8;                                   \
00523   } while (0)
00524 
00525 /* Same as STORE_NUMBER, except increment DESTINATION to
00526    the byte after where the number is stored.  Therefore, DESTINATION
00527    must be an lvalue.  */
00528 
00529 #define STORE_NUMBER_AND_INCR(destination, number)                      \
00530   do {                                                                  \
00531     STORE_NUMBER (destination, number);                                 \
00532     (destination) += 2;                                                 \
00533   } while (0)
00534 
00535 /* Put into DESTINATION a number stored in two contiguous bytes starting
00536    at SOURCE.  */
00537 
00538 #define EXTRACT_NUMBER(destination, source)                             \
00539   do {                                                                  \
00540     (destination) = *(source) & 0377;                                   \
00541     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
00542   } while (0)
00543 
00544 #ifdef DEBUG
00545 static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
00546 static void
00547 extract_number (dest, source)
00548      int *dest;
00549      unsigned char *source;
00550 {
00551     int temp = SIGN_EXTEND_CHAR (*(source + 1));
00552     *dest = *source & 0377;
00553     *dest += temp << 8;
00554 }
00555 
00556 #ifndef EXTRACT_MACROS /* To debug the macros.  */
00557 #undef EXTRACT_NUMBER
00558 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
00559 #endif /* not EXTRACT_MACROS */
00560 
00561 #endif /* DEBUG */
00562 
00563 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
00564    SOURCE must be an lvalue.  */
00565 
00566 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
00567   do {                                                                  \
00568     EXTRACT_NUMBER (destination, source);                               \
00569     (source) += 2;                                                      \
00570   } while (0)
00571 
00572 #ifdef DEBUG
00573 static void extract_number_and_incr _RE_ARGS ((int *destination,
00574                                                unsigned char **source));
00575 static void
00576 extract_number_and_incr (destination, source)
00577      int *destination;
00578      unsigned char **source;
00579 {
00580     extract_number (destination, *source);
00581     *source += 2;
00582 }
00583 
00584 #ifndef EXTRACT_MACROS
00585 #undef EXTRACT_NUMBER_AND_INCR
00586 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
00587   extract_number_and_incr (&dest, &src)
00588 #endif /* not EXTRACT_MACROS */
00589 
00590 #endif /* DEBUG */
00591 
00592 /* If DEBUG is defined, Regex prints many voluminous messages about what
00593    it is doing (if the variable `debug' is nonzero).  If linked with the
00594    main program in `iregex.c', you can enter patterns and strings
00595    interactively.  And if linked with the main program in `main.c' and
00596    the other test files, you can run the already-written tests.  */
00597 
00598 #ifdef DEBUG
00599 
00600 /* We use standard I/O for debugging.  */
00601 #include <stdio.h>
00602 
00603 /* It is useful to test things that ``must'' be true when debugging.  */
00604 #include <assert.h>
00605 
00606 static int debug = 0;
00607 
00608 #define DEBUG_STATEMENT(e) e
00609 #define DEBUG_PRINT1(x) if (debug) printf (x)
00610 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00611 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00612 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00613 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
00614   if (debug) print_partial_compiled_pattern (s, e)
00615 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
00616   if (debug) print_double_string (w, s1, sz1, s2, sz2)
00617 
00618 
00619 /* Print the fastmap in human-readable form.  */
00620 
00621 void
00622 print_fastmap (fastmap)
00623      char *fastmap;
00624 {
00625     unsigned was_a_range = 0;
00626     unsigned i = 0;
00627 
00628     while (i < (1 << BYTEWIDTH))
00629         {
00630             if (fastmap[i++])
00631                 {
00632                     was_a_range = 0;
00633                     putchar (i - 1);
00634                     while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
00635                         {
00636                             was_a_range = 1;
00637                             i++;
00638                         }
00639                     if (was_a_range)
00640                         {
00641                             printf ("-");
00642                             putchar (i - 1);
00643                         }
00644                 }
00645         }
00646     putchar ('\n');
00647 }
00648 
00649 
00650 /* Print a compiled pattern string in human-readable form, starting at
00651    the START pointer into it and ending just before the pointer END.  */
00652 
00653 void
00654 print_partial_compiled_pattern (start, end)
00655      unsigned char *start;
00656      unsigned char *end;
00657 {
00658     int mcnt, mcnt2;
00659     unsigned char *p1;
00660     unsigned char *p = start;
00661     unsigned char *pend = end;
00662 
00663     if (start == NULL)
00664         {
00665             printf ("(null)\n");
00666             return;
00667         }
00668 
00669     /* Loop over pattern commands.  */
00670     while (p < pend)
00671         {
00672             printf ("%d:\t", p - start);
00673 
00674             switch ((re_opcode_t) *p++)
00675                 {
00676                 case no_op:
00677                     printf ("/no_op");
00678                     break;
00679 
00680                 case exactn:
00681                     mcnt = *p++;
00682                     printf ("/exactn/%d", mcnt);
00683                     do
00684                         {
00685                             putchar ('/');
00686                             putchar (*p++);
00687                         }
00688                     while (--mcnt);
00689                     break;
00690 
00691                 case start_memory:
00692                     mcnt = *p++;
00693                     printf ("/start_memory/%d/%d", mcnt, *p++);
00694                     break;
00695 
00696                 case stop_memory:
00697                     mcnt = *p++;
00698                     printf ("/stop_memory/%d/%d", mcnt, *p++);
00699                     break;
00700 
00701                 case duplicate:
00702                     printf ("/duplicate/%d", *p++);
00703                     break;
00704 
00705                 case anychar:
00706                     printf ("/anychar");
00707                     break;
00708 
00709                 case charset:
00710                 case charset_not:
00711                     {
00712                         register int c, last = -100;
00713                         register int in_range = 0;
00714 
00715                         printf ("/charset [%s",
00716                                 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
00717 
00718                         assert (p + *p < pend);
00719 
00720                         for (c = 0; c < 256; c++)
00721                             if (c / 8 < *p
00722                                 && (p[1 + (c/8)] & (1 << (c % 8))))
00723                                 {
00724                                     /* Are we starting a range?  */
00725                                     if (last + 1 == c && ! in_range)
00726                                         {
00727                                             putchar ('-');
00728                                             in_range = 1;
00729                                         }
00730                                     /* Have we broken a range?  */
00731                                     else if (last + 1 != c && in_range)
00732                                         {
00733                                             putchar (last);
00734                                             in_range = 0;
00735                                         }
00736 
00737                                     if (! in_range)
00738                                         putchar (c);
00739 
00740                                     last = c;
00741                                 }
00742 
00743                         if (in_range)
00744                             putchar (last);
00745 
00746                         putchar (']');
00747 
00748                         p += 1 + *p;
00749                     }
00750                     break;
00751 
00752                 case begline:
00753                     printf ("/begline");
00754                     break;
00755 
00756                 case endline:
00757                     printf ("/endline");
00758                     break;
00759 
00760                 case on_failure_jump:
00761                     extract_number_and_incr (&mcnt, &p);
00762                     printf ("/on_failure_jump to %d", p + mcnt - start);
00763                     break;
00764 
00765                 case on_failure_keep_string_jump:
00766                     extract_number_and_incr (&mcnt, &p);
00767                     printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
00768                     break;
00769 
00770                 case dummy_failure_jump:
00771                     extract_number_and_incr (&mcnt, &p);
00772                     printf ("/dummy_failure_jump to %d", p + mcnt - start);
00773                     break;
00774 
00775                 case push_dummy_failure:
00776                     printf ("/push_dummy_failure");
00777                     break;
00778 
00779                 case maybe_pop_jump:
00780                     extract_number_and_incr (&mcnt, &p);
00781                     printf ("/maybe_pop_jump to %d", p + mcnt - start);
00782                     break;
00783 
00784                 case pop_failure_jump:
00785                     extract_number_and_incr (&mcnt, &p);
00786                     printf ("/pop_failure_jump to %d", p + mcnt - start);
00787                     break;
00788 
00789                 case jump_past_alt:
00790                     extract_number_and_incr (&mcnt, &p);
00791                     printf ("/jump_past_alt to %d", p + mcnt - start);
00792                     break;
00793 
00794                 case jump:
00795                     extract_number_and_incr (&mcnt, &p);
00796                     printf ("/jump to %d", p + mcnt - start);
00797                     break;
00798 
00799                 case succeed_n:
00800                     extract_number_and_incr (&mcnt, &p);
00801                     p1 = p + mcnt;
00802                     extract_number_and_incr (&mcnt2, &p);
00803                     printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
00804                     break;
00805 
00806                 case jump_n:
00807                     extract_number_and_incr (&mcnt, &p);
00808                     p1 = p + mcnt;
00809                     extract_number_and_incr (&mcnt2, &p);
00810                     printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
00811                     break;
00812 
00813                 case set_number_at:
00814                     extract_number_and_incr (&mcnt, &p);
00815                     p1 = p + mcnt;
00816                     extract_number_and_incr (&mcnt2, &p);
00817                     printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
00818                     break;
00819 
00820                 case wordbound:
00821                     printf ("/wordbound");
00822                     break;
00823 
00824                 case notwordbound:
00825                     printf ("/notwordbound");
00826                     break;
00827 
00828                 case wordbeg:
00829                     printf ("/wordbeg");
00830                     break;
00831 
00832                 case wordend:
00833                     printf ("/wordend");
00834 
00835 #ifdef emacs
00836                 case before_dot:
00837                     printf ("/before_dot");
00838                     break;
00839 
00840                 case at_dot:
00841                     printf ("/at_dot");
00842                     break;
00843 
00844                 case after_dot:
00845                     printf ("/after_dot");
00846                     break;
00847 
00848                 case syntaxspec:
00849                     printf ("/syntaxspec");
00850                     mcnt = *p++;
00851                     printf ("/%d", mcnt);
00852                     break;
00853 
00854                 case notsyntaxspec:
00855                     printf ("/notsyntaxspec");
00856                     mcnt = *p++;
00857                     printf ("/%d", mcnt);
00858                     break;
00859 #endif /* emacs */
00860 
00861                 case wordchar:
00862                     printf ("/wordchar");
00863                     break;
00864 
00865                 case notwordchar:
00866                     printf ("/notwordchar");
00867                     break;
00868 
00869                 case begbuf:
00870                     printf ("/begbuf");
00871                     break;
00872 
00873                 case endbuf:
00874                     printf ("/endbuf");
00875                     break;
00876 
00877                 default:
00878                     printf ("?%d", *(p-1));
00879                 }
00880 
00881             putchar ('\n');
00882         }
00883 
00884     printf ("%d:\tend of pattern.\n", p - start);
00885 }
00886 
00887 
00888 void
00889 print_compiled_pattern (bufp)
00890      struct re_pattern_buffer *bufp;
00891 {
00892     unsigned char *buffer = bufp->buffer;
00893 
00894     print_partial_compiled_pattern (buffer, buffer + bufp->used);
00895     printf ("%ld bytes used/%ld bytes allocated.\n",
00896             bufp->used, bufp->allocated);
00897 
00898     if (bufp->fastmap_accurate && bufp->fastmap)
00899         {
00900             printf ("fastmap: ");
00901             print_fastmap (bufp->fastmap);
00902         }
00903 
00904     printf ("re_nsub: %d\t", bufp->re_nsub);
00905     printf ("regs_alloc: %d\t", bufp->regs_allocated);
00906     printf ("can_be_null: %d\t", bufp->can_be_null);
00907     printf ("newline_anchor: %d\n", bufp->newline_anchor);
00908     printf ("no_sub: %d\t", bufp->no_sub);
00909     printf ("not_bol: %d\t", bufp->not_bol);
00910     printf ("not_eol: %d\t", bufp->not_eol);
00911     printf ("syntax: %lx\n", bufp->syntax);
00912     /* Perhaps we should print the translate table?  */
00913 }
00914 
00915 
00916 void
00917 print_double_string (where, string1, size1, string2, size2)
00918      const char *where;
00919      const char *string1;
00920      const char *string2;
00921      int size1;
00922      int size2;
00923 {
00924     int this_char;
00925 
00926     if (where == NULL)
00927         printf ("(null)");
00928     else
00929         {
00930             if (FIRST_STRING_P (where))
00931                 {
00932                     for (this_char = where - string1; this_char < size1; this_char++)
00933                         putchar (string1[this_char]);
00934 
00935                     where = string2;
00936                 }
00937 
00938             for (this_char = where - string2; this_char < size2; this_char++)
00939                 putchar (string2[this_char]);
00940         }
00941 }
00942 
00943 void
00944 printchar (c)
00945      int c;
00946 {
00947     putc (c, stderr);
00948 }
00949 
00950 #else /* not DEBUG */
00951 
00952 #undef assert
00953 #define assert(e)
00954 
00955 #define DEBUG_STATEMENT(e)
00956 #define DEBUG_PRINT1(x)
00957 #define DEBUG_PRINT2(x1, x2)
00958 #define DEBUG_PRINT3(x1, x2, x3)
00959 #define DEBUG_PRINT4(x1, x2, x3, x4)
00960 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
00961 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
00962 
00963 #endif /* not DEBUG */
00964 
00965 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
00966    also be assigned to arbitrarily: each pattern buffer stores its own
00967    syntax, so it can be changed between regex compilations.  */
00968 /* This has no initializer because initialized variables in Emacs
00969    become read-only after dumping.  */
00970 reg_syntax_t re_syntax_options;
00971 
00972 
00973 /* Specify the precise syntax of regexps for compilation.  This provides
00974    for compatibility for various utilities which historically have
00975    different, incompatible syntaxes.
00976 
00977    The argument SYNTAX is a bit mask comprised of the various bits
00978    defined in regex.h.  We return the old syntax.  */
00979 
00980 reg_syntax_t
00981 re_set_syntax(reg_syntax_t syntax)
00982 {
00983     reg_syntax_t ret = re_syntax_options;
00984 
00985     re_syntax_options = syntax;
00986 #ifdef DEBUG
00987     if (syntax & RE_DEBUG)
00988         debug = 1;
00989     else if (debug) /* was on but now is not */
00990         debug = 0;
00991 #endif /* DEBUG */
00992     return ret;
00993 }
00994 
00995 /* This table gives an error message for each of the error codes listed
00996    in regex.h.  Obviously the order here has to be same as there.
00997    POSIX doesn't require that we do anything for REG_NOERROR,
00998    but why not be nice?  */
00999 
01000 static const char *re_error_msgid[] =
01001 {
01002     gettext_noop ("Success"),   /* REG_NOERROR */
01003     gettext_noop ("No match"),  /* REG_NOMATCH */
01004     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
01005     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
01006     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
01007     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
01008     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
01009     gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
01010     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
01011     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
01012     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
01013     gettext_noop ("Invalid range end"), /* REG_ERANGE */
01014     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
01015     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
01016     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
01017     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
01018     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
01019 };
01020 
01021 /* Avoiding alloca during matching, to placate r_alloc.  */
01022 
01023 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
01024    searching and matching functions should not call alloca.  On some
01025    systems, alloca is implemented in terms of malloc, and if we're
01026    using the relocating allocator routines, then malloc could cause a
01027    relocation, which might (if the strings being searched are in the
01028    ralloc heap) shift the data out from underneath the regexp
01029    routines.
01030 
01031    Here's another reason to avoid allocation: Emacs
01032    processes input from X in a signal handler; processing X input may
01033    call malloc; if input arrives while a matching routine is calling
01034    malloc, then we're scrod.  But Emacs can't just block input while
01035    calling matching routines; then we don't notice interrupts when
01036    they come in.  So, Emacs blocks input around all regexp calls
01037    except the matching calls, which it leaves unprotected, in the
01038    faith that they will not malloc.  */
01039 
01040 /* Normally, this is fine.  */
01041 #define MATCH_MAY_ALLOCATE
01042 
01043 /* When using GNU C, we are not REALLY using the C alloca, no matter
01044    what config.h may say.  So don't take precautions for it.  */
01045 #ifdef __GNUC__
01046 #undef C_ALLOCA
01047 #endif
01048 
01049 /* The match routines may not allocate if (1) they would do it with malloc
01050    and (2) it's not safe for them to use malloc.
01051    Note that if REL_ALLOC is defined, matching would not use malloc for the
01052    failure stack, but we would still use it for the register vectors;
01053    so REL_ALLOC should not affect this.  */
01054 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
01055 #undef MATCH_MAY_ALLOCATE
01056 #endif
01057 
01058 
01059 /* Failure stack declarations and macros; both re_compile_fastmap and
01060    re_match_2 use a failure stack.  These have to be macros because of
01061    REGEX_ALLOCATE_STACK.  */
01062 
01063 
01064 /* Number of failure points for which to initially allocate space
01065    when matching.  If this number is exceeded, we allocate more
01066    space, so it is not a hard limit.  */
01067 #ifndef INIT_FAILURE_ALLOC
01068 #define INIT_FAILURE_ALLOC 5
01069 #endif
01070 
01071 /* Roughly the maximum number of failure points on the stack.  Would be
01072    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
01073    This is a variable only so users of regex can assign to it; we never
01074    change it ourselves.  */
01075 
01076 #ifdef INT_IS_16BIT
01077 
01078 #if defined (MATCH_MAY_ALLOCATE)
01079 /* 4400 was enough to cause a crash on Alpha OSF/1,
01080    whose default stack limit is 2mb.  */
01081 static long int re_max_failures = 4000;
01082 #else
01083 static long int re_max_failures = 2000;
01084 #endif
01085 
01086 union fail_stack_elt
01087 {
01088     unsigned char *pointer;
01089     long int integer;
01090 };
01091 
01092 typedef union fail_stack_elt fail_stack_elt_t;
01093 
01094 typedef struct
01095 {
01096     fail_stack_elt_t *stack;
01097     unsigned long int size;
01098     unsigned long int avail;            /* Offset of next open position.  */
01099 } fail_stack_type;
01100 
01101 #else /* not INT_IS_16BIT */
01102 
01103 #if defined (MATCH_MAY_ALLOCATE)
01104 /* 4400 was enough to cause a crash on Alpha OSF/1,
01105    whose default stack limit is 2mb.  */
01106 static int re_max_failures = 20000;
01107 #else
01108 static int re_max_failures = 2000;
01109 #endif
01110 
01111 union fail_stack_elt
01112 {
01113     unsigned char *pointer;
01114     int integer;
01115 };
01116 
01117 typedef union fail_stack_elt fail_stack_elt_t;
01118 
01119 typedef struct
01120 {
01121     fail_stack_elt_t *stack;
01122     unsigned size;
01123     unsigned avail;                     /* Offset of next open position.  */
01124 } fail_stack_type;
01125 
01126 #endif /* INT_IS_16BIT */
01127 
01128 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
01129 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
01130 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
01131 
01132 
01133 /* Define macros to initialize and free the failure stack.
01134    Do `return -2' if the alloc fails.  */
01135 
01136 #ifdef MATCH_MAY_ALLOCATE
01137 #define INIT_FAIL_STACK()                                               \
01138   do {                                                                  \
01139     fail_stack.stack = (fail_stack_elt_t *)                             \
01140       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));    \
01141                                                                         \
01142     if (fail_stack.stack == NULL)                                       \
01143       return -2;                                                        \
01144                                                                         \
01145     fail_stack.size = INIT_FAILURE_ALLOC;                               \
01146     fail_stack.avail = 0;                                               \
01147   } while (0)
01148 
01149 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
01150 #else
01151 #define INIT_FAIL_STACK()                                               \
01152   do {                                                                  \
01153     fail_stack.avail = 0;                                               \
01154   } while (0)
01155 
01156 #define RESET_FAIL_STACK()
01157 #endif
01158 
01159 
01160 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
01161 
01162    Return 1 if succeeds, and 0 if either ran out of memory
01163    allocating space for it or it was already too large.
01164 
01165    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
01166 
01167 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
01168   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
01169    ? 0                                                                  \
01170    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
01171         REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
01172           (fail_stack).size * sizeof (fail_stack_elt_t),                \
01173           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
01174                                                                         \
01175       (fail_stack).stack == NULL                                        \
01176       ? 0                                                               \
01177       : ((fail_stack).size <<= 1,                                       \
01178          1)))
01179 
01180 
01181 /* Push pointer POINTER on FAIL_STACK.
01182    Return 1 if was able to do so and 0 if ran out of memory allocating
01183    space to do so.  */
01184 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                            \
01185   ((FAIL_STACK_FULL ()                                                  \
01186     && !DOUBLE_FAIL_STACK (FAIL_STACK))                                 \
01187    ? 0                                                                  \
01188    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,       \
01189       1))
01190 
01191 /* Push a pointer value onto the failure stack.
01192    Assumes the variable `fail_stack'.  Probably should only
01193    be called from within `PUSH_FAILURE_POINT'.  */
01194 #define PUSH_FAILURE_POINTER(item)                                      \
01195   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
01196 
01197 /* This pushes an integer-valued item onto the failure stack.
01198    Assumes the variable `fail_stack'.  Probably should only
01199    be called from within `PUSH_FAILURE_POINT'.  */
01200 #define PUSH_FAILURE_INT(item)                                  \
01201   fail_stack.stack[fail_stack.avail++].integer = (item)
01202 
01203 /* Push a fail_stack_elt_t value onto the failure stack.
01204    Assumes the variable `fail_stack'.  Probably should only
01205    be called from within `PUSH_FAILURE_POINT'.  */
01206 #define PUSH_FAILURE_ELT(item)                                  \
01207   fail_stack.stack[fail_stack.avail++] =  (item)
01208 
01209 /* These three POP... operations complement the three PUSH... operations.
01210    All assume that `fail_stack' is nonempty.  */
01211 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
01212 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
01213 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
01214 
01215 /* Used to omit pushing failure point id's when we're not debugging.  */
01216 #ifdef DEBUG
01217 #define DEBUG_PUSH PUSH_FAILURE_INT
01218 #define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()
01219 #else
01220 #define DEBUG_PUSH(item)
01221 #define DEBUG_POP(item_addr)
01222 #endif
01223 
01224 
01225 /* Push the information about the state we will need
01226    if we ever fail back to it.
01227 
01228    Requires variables fail_stack, regstart, regend, reg_info, and
01229    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
01230    declared.
01231 
01232    Does `return FAILURE_CODE' if runs out of memory.  */
01233 
01234 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
01235   do {                                                                  \
01236     char *destination;                                                  \
01237     /* Must be int, so when we don't save any registers, the arithmetic \
01238        of 0 + -1 isn't done as unsigned.  */                            \
01239     /* Can't be int, since there is not a shred of a guarantee that int \
01240        is wide enough to hold a value of something to which pointer can \
01241        be assigned */                                                   \
01242     s_reg_t this_reg;                                                   \
01243                                                                         \
01244     DEBUG_STATEMENT (failure_id++);                                     \
01245     DEBUG_STATEMENT (nfailure_points_pushed++);                         \
01246     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
01247     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
01248     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
01249                                                                         \
01250     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);           \
01251     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
01252                                                                         \
01253     /* Ensure we have enough space allocated for what we will push.  */ \
01254     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
01255       {                                                                 \
01256         if (!DOUBLE_FAIL_STACK (fail_stack))                            \
01257           return failure_code;                                          \
01258                                                                         \
01259         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
01260                        (fail_stack).size);                              \
01261         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
01262       }                                                                 \
01263                                                                         \
01264     /* Push the info, starting with the registers.  */                  \
01265     DEBUG_PRINT1 ("\n");                                                \
01266                                                                         \
01267     if (1)                                                              \
01268       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
01269            this_reg++)                                                  \
01270         {                                                               \
01271           DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);               \
01272           DEBUG_STATEMENT (num_regs_pushed++);                          \
01273                                                                         \
01274           DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);       \
01275           PUSH_FAILURE_POINTER (regstart[this_reg]);                    \
01276                                                                         \
01277           DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);           \
01278           PUSH_FAILURE_POINTER (regend[this_reg]);                      \
01279                                                                         \
01280           DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);  \
01281           DEBUG_PRINT2 (" match_null=%d",                               \
01282                         REG_MATCH_NULL_STRING_P (reg_info[this_reg]));  \
01283           DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));  \
01284           DEBUG_PRINT2 (" matched_something=%d",                        \
01285                         MATCHED_SOMETHING (reg_info[this_reg]));        \
01286           DEBUG_PRINT2 (" ever_matched=%d",                             \
01287                         EVER_MATCHED_SOMETHING (reg_info[this_reg]));   \
01288           DEBUG_PRINT1 ("\n");                                          \
01289           PUSH_FAILURE_ELT (reg_info[this_reg].word);                   \
01290         }                                                               \
01291                                                                         \
01292     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
01293     PUSH_FAILURE_INT (lowest_active_reg);                               \
01294                                                                         \
01295     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
01296     PUSH_FAILURE_INT (highest_active_reg);                              \
01297                                                                         \
01298     DEBUG_PRINT2 ("  Pushing pattern 0x%x:\n", pattern_place);          \
01299     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
01300     PUSH_FAILURE_POINTER (pattern_place);                               \
01301                                                                         \
01302     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);            \
01303     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
01304                                  size2);                                \
01305     DEBUG_PRINT1 ("'\n");                                               \
01306     PUSH_FAILURE_POINTER (string_place);                                \
01307                                                                         \
01308     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
01309     DEBUG_PUSH (failure_id);                                            \
01310   } while (0)
01311 
01312 /* This is the number of items that are pushed and popped on the stack
01313    for each register.  */
01314 #define NUM_REG_ITEMS  3
01315 
01316 /* Individual items aside from the registers.  */
01317 #ifdef DEBUG
01318 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
01319 #else
01320 #define NUM_NONREG_ITEMS 4
01321 #endif
01322 
01323 /* We push at most this many items on the stack.  */
01324 /* We used to use (num_regs - 1), which is the number of registers
01325    this regexp will save; but that was changed to 5
01326    to avoid stack overflow for a regexp with lots of parens.  */
01327 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
01328 
01329 /* We actually push this many items.  */
01330 #define NUM_FAILURE_ITEMS                               \
01331   (((0                                                  \
01332      ? 0 : highest_active_reg - lowest_active_reg + 1)  \
01333     * NUM_REG_ITEMS)                                    \
01334    + NUM_NONREG_ITEMS)
01335 
01336 /* How many items can still be added to the stack without overflowing it.  */
01337 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
01338 
01339 
01340 /* Pops what PUSH_FAIL_STACK pushes.
01341 
01342    We restore into the parameters, all of which should be lvalues:
01343      STR -- the saved data position.
01344      PAT -- the saved pattern position.
01345      LOW_REG, HIGH_REG -- the highest and lowest active registers.
01346      REGSTART, REGEND -- arrays of string positions.
01347      REG_INFO -- array of information about each subexpression.
01348 
01349    Also assumes the variables `fail_stack' and (if debugging), `bufp',
01350    `pend', `string1', `size1', `string2', and `size2'.  */
01351 
01352 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
01353 {                                                                       \
01354   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                        \
01355   s_reg_t this_reg;                                                     \
01356   const unsigned char *string_temp;                                     \
01357                                                                         \
01358   assert (!FAIL_STACK_EMPTY ());                                        \
01359                                                                         \
01360   /* Remove failure points and point to how many regs pushed.  */       \
01361   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
01362   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
01363   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
01364                                                                         \
01365   assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
01366                                                                         \
01367   DEBUG_POP (&failure_id);                                              \
01368   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
01369                                                                         \
01370   /* If the saved string location is NULL, it came from an              \
01371      on_failure_keep_string_jump opcode, and we want to throw away the  \
01372      saved NULL, thus retaining our current position in the string.  */ \
01373   string_temp = POP_FAILURE_POINTER ();                                 \
01374   if (string_temp != NULL)                                              \
01375     str = (const char *) string_temp;                                   \
01376                                                                         \
01377   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                       \
01378   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
01379   DEBUG_PRINT1 ("'\n");                                                 \
01380                                                                         \
01381   pat = (unsigned char *) POP_FAILURE_POINTER ();                       \
01382   DEBUG_PRINT2 ("  Popping pattern 0x%x:\n", pat);                      \
01383   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
01384                                                                         \
01385   /* Restore register info.  */                                         \
01386   high_reg = (active_reg_t) POP_FAILURE_INT ();                         \
01387   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);           \
01388                                                                         \
01389   low_reg = (active_reg_t) POP_FAILURE_INT ();                          \
01390   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);            \
01391                                                                         \
01392   if (1)                                                                \
01393     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)          \
01394       {                                                                 \
01395         DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);               \
01396                                                                         \
01397         reg_info[this_reg].word = POP_FAILURE_ELT ();                   \
01398         DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);        \
01399                                                                         \
01400         regend[this_reg] = (const char *) POP_FAILURE_POINTER ();       \
01401         DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);           \
01402                                                                         \
01403         regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();     \
01404         DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);       \
01405       }                                                                 \
01406   else                                                                  \
01407     {                                                                   \
01408       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
01409         {                                                               \
01410           reg_info[this_reg].word.integer = 0;                          \
01411           regend[this_reg] = 0;                                         \
01412           regstart[this_reg] = 0;                                       \
01413         }                                                               \
01414       highest_active_reg = high_reg;                                    \
01415     }                                                                   \
01416                                                                         \
01417   set_regs_matched_done = 0;                                            \
01418   DEBUG_STATEMENT (nfailure_points_popped++);                           \
01419 } /* POP_FAILURE_POINT */
01420 
01421 
01422 
01423 /* Structure for per-register (a.k.a. per-group) information.
01424    Other register information, such as the
01425    starting and ending positions (which are addresses), and the list of
01426    inner groups (which is a bits list) are maintained in separate
01427    variables.
01428 
01429    We are making a (strictly speaking) nonportable assumption here: that
01430    the compiler will pack our bit fields into something that fits into
01431    the type of `word', i.e., is something that fits into one item on the
01432    failure stack.  */
01433 
01434 
01435 /* Declarations and macros for re_match_2.  */
01436 
01437 typedef union
01438 {
01439     fail_stack_elt_t word;
01440     struct
01441     {
01442         /* This field is one if this group can match the empty string,
01443            zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
01444 #define MATCH_NULL_UNSET_VALUE 3
01445         unsigned match_null_string_p : 2;
01446         unsigned is_active : 1;
01447         unsigned matched_something : 1;
01448         unsigned ever_matched_something : 1;
01449     } bits;
01450 } register_info_type;
01451 
01452 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
01453 #define IS_ACTIVE(R)  ((R).bits.is_active)
01454 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
01455 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
01456 
01457 
01458 /* Call this when have matched a real character; it sets `matched' flags
01459    for the subexpressions which we are currently inside.  Also records
01460    that those subexprs have matched.  */
01461 #define SET_REGS_MATCHED()                                              \
01462   do                                                                    \
01463     {                                                                   \
01464       if (!set_regs_matched_done)                                       \
01465         {                                                               \
01466           active_reg_t r;                                               \
01467           set_regs_matched_done = 1;                                    \
01468           for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
01469             {                                                           \
01470               MATCHED_SOMETHING (reg_info[r])                           \
01471                 = EVER_MATCHED_SOMETHING (reg_info[r])                  \
01472                 = 1;                                                    \
01473             }                                                           \
01474         }                                                               \
01475     }                                                                   \
01476   while (0)
01477 
01478 /* Registers are set to a sentinel when they haven't yet matched.  */
01479 static char reg_unset_dummy;
01480 #define REG_UNSET_VALUE (&reg_unset_dummy)
01481 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
01482 
01483 /* Subroutine declarations and macros for regex_compile.  */
01484 
01485 static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
01486                                               reg_syntax_t syntax,
01487                                               struct re_pattern_buffer *bufp));
01488 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
01489 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01490                                  int arg1, int arg2));
01491 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01492                                   int arg, unsigned char *end));
01493 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01494                                   int arg1, int arg2, unsigned char *end));
01495 static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
01496                                            reg_syntax_t syntax));
01497 static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
01498                                            reg_syntax_t syntax));
01499 static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
01500                                               const char *pend,
01501                                               char *translate,
01502                                               reg_syntax_t syntax,
01503                                               unsigned char *b));
01504 
01505 /* Fetch the next character in the uncompiled pattern---translating it
01506    if necessary.  Also cast from a signed character in the constant
01507    string passed to us by the user to an unsigned char that we can use
01508    as an array index (in, e.g., `translate').  */
01509 #ifndef PATFETCH
01510 #define PATFETCH(c)                                                     \
01511   do {if (p == pend) return REG_EEND;                                   \
01512     c = (unsigned char) *p++;                                           \
01513     if (translate) c = (unsigned char) translate[c];                    \
01514   } while (0)
01515 #endif
01516 
01517 /* Fetch the next character in the uncompiled pattern, with no
01518    translation.  */
01519 #define PATFETCH_RAW(c)                                                 \
01520   do {if (p == pend) return REG_EEND;                                   \
01521     c = (unsigned char) *p++;                                           \
01522   } while (0)
01523 
01524 /* Go backwards one character in the pattern.  */
01525 #define PATUNFETCH p--
01526 
01527 
01528 /* If `translate' is non-null, return translate[D], else just D.  We
01529    cast the subscript to translate because some data is declared as
01530    `char *', to avoid warnings when a string constant is passed.  But
01531    when we use a character as a subscript we must make it unsigned.  */
01532 #ifndef TRANSLATE
01533 #define TRANSLATE(d) \
01534   (translate ? (char) translate[(unsigned char) (d)] : (d))
01535 #endif
01536 
01537 
01538 /* Macros for outputting the compiled pattern into `buffer'.  */
01539 
01540 /* If the buffer isn't allocated when it comes in, use this.  */
01541 #define INIT_BUF_SIZE  32
01542 
01543 /* Make sure we have at least N more bytes of space in buffer.  */
01544 #define GET_BUFFER_SPACE(n)                                             \
01545     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)  \
01546       EXTEND_BUFFER ()
01547 
01548 /* Make sure we have one more byte of buffer space and then add C to it.  */
01549 #define BUF_PUSH(c)                                                     \
01550   do {                                                                  \
01551     GET_BUFFER_SPACE (1);                                               \
01552     *b++ = (unsigned char) (c);                                         \
01553   } while (0)
01554 
01555 
01556 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
01557 #define BUF_PUSH_2(c1, c2)                                              \
01558   do {                                                                  \
01559     GET_BUFFER_SPACE (2);                                               \
01560     *b++ = (unsigned char) (c1);                                        \
01561     *b++ = (unsigned char) (c2);                                        \
01562   } while (0)
01563 
01564 
01565 /* As with BUF_PUSH_2, except for three bytes.  */
01566 #define BUF_PUSH_3(c1, c2, c3)                                          \
01567   do {                                                                  \
01568     GET_BUFFER_SPACE (3);                                               \
01569     *b++ = (unsigned char) (c1);                                        \
01570     *b++ = (unsigned char) (c2);                                        \
01571     *b++ = (unsigned char) (c3);                                        \
01572   } while (0)
01573 
01574 
01575 /* Store a jump with opcode OP at LOC to location TO.  We store a
01576    relative address offset by the three bytes the jump itself occupies.  */
01577 #define STORE_JUMP(op, loc, to) \
01578   store_op1 (op, loc, (int) ((to) - (loc) - 3))
01579 
01580 /* Likewise, for a two-argument jump.  */
01581 #define STORE_JUMP2(op, loc, to, arg) \
01582   store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
01583 
01584 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
01585 #define INSERT_JUMP(op, loc, to) \
01586   insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
01587 
01588 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
01589 #define INSERT_JUMP2(op, loc, to, arg) \
01590   insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
01591 
01592 
01593 /* This is not an arbitrary limit: the arguments which represent offsets
01594    into the pattern are two bytes long.  So if 2^16 bytes turns out to
01595    be too small, many things would have to change.  */
01596 /* Any other compiler which, like MSC, has allocation limit below 2^16
01597    bytes will have to use approach similar to what was done below for
01598    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
01599    reallocating to 0 bytes.  Such thing is not going to work too well.
01600    You have been warned!!  */
01601 #if defined(_MSC_VER)  && !defined(_WIN32)
01602 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
01603    The REALLOC define eliminates a flurry of conversion warnings,
01604    but is not required. */
01605 #define MAX_BUF_SIZE  65500L
01606 #define REALLOC(p,s) realloc ((p), (size_t) (s))
01607 #else
01608 #define MAX_BUF_SIZE (1L << 16)
01609 #define REALLOC(p,s) realloc ((p), (s))
01610 #endif
01611 
01612 /* Extend the buffer by twice its current size via realloc and
01613    reset the pointers that pointed into the old block to point to the
01614    correct places in the new one.  If extending the buffer results in it
01615    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
01616 #define EXTEND_BUFFER()                                                 \
01617   do {                                                                  \
01618     unsigned char *old_buffer = bufp->buffer;                           \
01619     if (bufp->allocated == MAX_BUF_SIZE)                                \
01620       return REG_ESIZE;                                                 \
01621     bufp->allocated <<= 1;                                              \
01622     if (bufp->allocated > MAX_BUF_SIZE)                                 \
01623       bufp->allocated = MAX_BUF_SIZE;                                   \
01624     bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
01625     if (bufp->buffer == NULL)                                           \
01626       return REG_ESPACE;                                                \
01627     /* If the buffer moved, move all the pointers into it.  */          \
01628     if (old_buffer != bufp->buffer)                                     \
01629       {                                                                 \
01630         b = (b - old_buffer) + bufp->buffer;                            \
01631         begalt = (begalt - old_buffer) + bufp->buffer;                  \
01632         if (fixup_alt_jump)                                             \
01633           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
01634         if (laststart)                                                  \
01635           laststart = (laststart - old_buffer) + bufp->buffer;          \
01636         if (pending_exact)                                              \
01637           pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
01638       }                                                                 \
01639   } while (0)
01640 
01641 
01642 /* Since we have one byte reserved for the register number argument to
01643    {start,stop}_memory, the maximum number of groups we can report
01644    things about is what fits in that byte.  */
01645 #define MAX_REGNUM 255
01646 
01647 /* But patterns can have more than `MAX_REGNUM' registers.  We just
01648    ignore the excess.  */
01649 typedef unsigned regnum_t;
01650 
01651 
01652 /* Macros for the compile stack.  */
01653 
01654 /* Since offsets can go either forwards or backwards, this type needs to
01655    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
01656 /* int may be not enough when sizeof(int) == 2.  */
01657 typedef long pattern_offset_t;
01658 
01659 typedef struct
01660 {
01661     pattern_offset_t begalt_offset;
01662     pattern_offset_t fixup_alt_jump;
01663     pattern_offset_t inner_group_offset;
01664     pattern_offset_t laststart_offset;
01665     regnum_t regnum;
01666 } compile_stack_elt_t;
01667 
01668 
01669 typedef struct
01670 {
01671     compile_stack_elt_t *stack;
01672     unsigned size;
01673     unsigned avail;                     /* Offset of next open position.  */
01674 } compile_stack_type;
01675 
01676 
01677 #define INIT_COMPILE_STACK_SIZE 32
01678 
01679 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
01680 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
01681 
01682 /* The next available element.  */
01683 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
01684 
01685 
01686 /* Set the bit for character C in a list.  */
01687 #define SET_LIST_BIT(c)                               \
01688   (b[((unsigned char) (c)) / BYTEWIDTH]               \
01689    |= 1 << (((unsigned char) c) % BYTEWIDTH))
01690 
01691 
01692 /* Get the next unsigned number in the uncompiled pattern.  */
01693 #define GET_UNSIGNED_NUMBER(num)                                        \
01694   { if (p != pend)                                                      \
01695      {                                                                  \
01696        PATFETCH (c);                                                    \
01697        while (ISDIGIT (c))                                              \
01698          {                                                              \
01699            if (num < 0)                                                 \
01700               num = 0;                                                  \
01701            num = num * 10 + c - '0';                                    \
01702            if (p == pend)                                               \
01703               break;                                                    \
01704            PATFETCH (c);                                                \
01705          }                                                              \
01706        }                                                                \
01707     }
01708 
01709 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
01710 /* The GNU C library provides support for user-defined character classes
01711    and the functions from ISO C amendement 1.  */
01712 # ifdef CHARCLASS_NAME_MAX
01713 #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
01714 # else
01715 /* This shouldn't happen but some implementation might still have this
01716    problem.  Use a reasonable default value.  */
01717 #  define CHAR_CLASS_MAX_LENGTH 256
01718 # endif
01719 
01720 # define IS_CHAR_CLASS(string) wctype (string)
01721 #else
01722 # define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
01723 
01724 # define IS_CHAR_CLASS(string)                                          \
01725    (STREQ (string, "alpha") || STREQ (string, "upper")                  \
01726     || STREQ (string, "lower") || STREQ (string, "digit")               \
01727     || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
01728     || STREQ (string, "space") || STREQ (string, "print")               \
01729     || STREQ (string, "punct") || STREQ (string, "graph")               \
01730     || STREQ (string, "cntrl") || STREQ (string, "blank"))
01731 #endif
01732 
01733 #ifndef MATCH_MAY_ALLOCATE
01734 
01735 /* If we cannot allocate large objects within re_match_2_internal,
01736    we make the fail stack and register vectors global.
01737    The fail stack, we grow to the maximum size when a regexp
01738    is compiled.
01739    The register vectors, we adjust in size each time we
01740    compile a regexp, according to the number of registers it needs.  */
01741 
01742 static fail_stack_type fail_stack;
01743 
01744 /* Size with which the following vectors are currently allocated.
01745    That is so we can make them bigger as needed,
01746    but never make them smaller.  */
01747 static int regs_allocated_size;
01748 
01749 static const char **     regstart, **     regend;
01750 static const char ** old_regstart, ** old_regend;
01751 static const char **best_regstart, **best_regend;
01752 static register_info_type *reg_info;
01753 static const char **reg_dummy;
01754 static register_info_type *reg_info_dummy;
01755 
01756 /* Make the register vectors big enough for NUM_REGS registers,
01757    but don't make them smaller.  */
01758 
01759 static
01760 regex_grow_registers (num_regs)
01761      int num_regs;
01762 {
01763     if (num_regs > regs_allocated_size)
01764         {
01765             RETALLOC_IF (regstart,       num_regs, const char *);
01766             RETALLOC_IF (regend,         num_regs, const char *);
01767             RETALLOC_IF (old_regstart, num_regs, const char *);
01768             RETALLOC_IF (old_regend,     num_regs, const char *);
01769             RETALLOC_IF (best_regstart, num_regs, const char *);
01770             RETALLOC_IF (best_regend,    num_regs, const char *);
01771             RETALLOC_IF (reg_info,       num_regs, register_info_type);
01772             RETALLOC_IF (reg_dummy,      num_regs, const char *);
01773             RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
01774 
01775             regs_allocated_size = num_regs;
01776         }
01777 }
01778 
01779 #endif /* not MATCH_MAY_ALLOCATE */
01780 
01781 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
01782                                                  compile_stack,
01783                                                  regnum_t regnum));
01784 
01785 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
01786    Returns one of error codes defined in `regex.h', or zero for success.
01787 
01788    Assumes the `allocated' (and perhaps `buffer') and `translate'
01789    fields are set in BUFP on entry.
01790 
01791    If it succeeds, results are put in BUFP (if it returns an error, the
01792    contents of BUFP are undefined):
01793      `buffer' is the compiled pattern;
01794      `syntax' is set to SYNTAX;
01795      `used' is set to the length of the compiled pattern;
01796      `fastmap_accurate' is zero;
01797      `re_nsub' is the number of subexpressions in PATTERN;
01798      `not_bol' and `not_eol' are zero;
01799 
01800    The `fastmap' and `newline_anchor' fields are neither
01801    examined nor set.  */
01802 
01803 /* Return, freeing storage we allocated.  */
01804 #define FREE_STACK_RETURN(value)                \
01805   return (free (compile_stack.stack), value)
01806 
01807 static reg_errcode_t
01808 regex_compile (const char *pattern,
01809                size_t size,
01810                reg_syntax_t syntax,
01811                struct re_pattern_buffer *bufp)
01812 {
01813     /* We fetch characters from PATTERN here.  Even though PATTERN is
01814        `char *' (i.e., signed), we declare these variables as unsigned, so
01815        they can be reliably used as array indices.  */
01816     register unsigned char c, c1;
01817 
01818     /* A random temporary spot in PATTERN.  */
01819     const char *p1;
01820 
01821     /* Points to the end of the buffer, where we should append.  */
01822     register unsigned char *b;
01823 
01824     /* Keeps track of unclosed groups.  */
01825     compile_stack_type compile_stack;
01826 
01827     /* Points to the current (ending) position in the pattern.  */
01828     const char *p = pattern;
01829     const char *pend = pattern + size;
01830 
01831   /* How to translate the characters in the pattern.  */
01832     RE_TRANSLATE_TYPE translate = bufp->translate;
01833 
01834     /* Address of the count-byte of the most recently inserted `exactn'
01835      command.  This makes it possible to tell if a new exact-match
01836      character can be added to that command or if the character requires
01837      a new `exactn' command.  */
01838     unsigned char *pending_exact = 0;
01839 
01840     /* Address of start of the most recently finished expression.
01841      This tells, e.g., postfix * where to find the start of its
01842      operand.  Reset at the beginning of groups and alternatives.  */
01843     unsigned char *laststart = 0;
01844 
01845     /* Address of beginning of regexp, or inside of last group.  */
01846     unsigned char *begalt;
01847 
01848     /* Place in the uncompiled pattern (i.e., the {) to
01849      which to go back if the interval is invalid.  */
01850     const char *beg_interval;
01851 
01852     /* Address of the place where a forward jump should go to the end of
01853      the containing expression.  Each alternative of an `or' -- except the
01854      last -- ends with a forward jump of this sort.  */
01855     unsigned char *fixup_alt_jump = 0;
01856 
01857     /* Counts open-groups as they are encountered.  Remembered for the
01858      matching close-group on the compile stack, so the same register
01859      number is put in the stop_memory as the start_memory.  */
01860     regnum_t regnum = 0;
01861 
01862 #ifdef DEBUG
01863     DEBUG_PRINT1 ("\nCompiling pattern: ");
01864     if (debug)
01865         {
01866             unsigned debug_count;
01867 
01868             for (debug_count = 0; debug_count < size; debug_count++)
01869                 putchar (pattern[debug_count]);
01870             putchar ('\n');
01871         }
01872 #endif /* DEBUG */
01873 
01874     /* Initialize the compile stack.  */
01875     compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
01876     if (compile_stack.stack == NULL)
01877         return REG_ESPACE;
01878 
01879     compile_stack.size = INIT_COMPILE_STACK_SIZE;
01880     compile_stack.avail = 0;
01881 
01882   /* Initialize the pattern buffer.  */
01883     bufp->syntax = syntax;
01884     bufp->fastmap_accurate = 0;
01885     bufp->not_bol = bufp->not_eol = 0;
01886 
01887     /* Set `used' to zero, so that if we return an error, the pattern
01888      printer (for debugging) will think there's no pattern.  We reset it
01889      at the end.  */
01890     bufp->used = 0;
01891 
01892     /* Always count groups, whether or not bufp->no_sub is set.  */
01893     bufp->re_nsub = 0;
01894 
01895 #if !defined (emacs) && !defined (SYNTAX_TABLE)
01896     /* Initialize the syntax table.  */
01897     init_syntax_once ();
01898 #endif
01899 
01900     if (bufp->allocated == 0)
01901         {
01902             if (bufp->buffer)
01903                 { /* If zero allocated, but buffer is non-null, try to realloc
01904                      enough space.  This loses if buffer's address is bogus, but
01905                      that is the user's responsibility.  */
01906                     RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
01907                 }
01908             else
01909                 { /* Caller did not allocate a buffer.  Do it for them.  */
01910                     bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
01911                 }
01912             if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
01913 
01914             bufp->allocated = INIT_BUF_SIZE;
01915         }
01916 
01917     begalt = b = bufp->buffer;
01918 
01919   /* Loop through the uncompiled pattern until we're at the end.  */
01920     while (p != pend)
01921         {
01922             PATFETCH (c);
01923 
01924             switch (c)
01925                 {
01926                 case '^':
01927                     {
01928                         if (   /* If at start of pattern, it's an operator.  */
01929                             p == pattern + 1
01930                             /* If context independent, it's an operator.  */
01931                             || syntax & RE_CONTEXT_INDEP_ANCHORS
01932                             /* Otherwise, depends on what's come before.  */
01933                             || at_begline_loc_p (pattern, p, syntax))
01934                             BUF_PUSH (begline);
01935                         else
01936                             goto normal_char;
01937                     }
01938                     break;
01939 
01940 
01941                 case '$':
01942                     {
01943                         if (   /* If at end of pattern, it's an operator.  */
01944                             p == pend
01945                             /* If context independent, it's an operator.  */
01946                             || syntax & RE_CONTEXT_INDEP_ANCHORS
01947                             /* Otherwise, depends on what's next.  */
01948                             || at_endline_loc_p (p, pend, syntax))
01949                             BUF_PUSH (endline);
01950                         else
01951                             goto normal_char;
01952                     }
01953                     break;
01954 
01955 
01956                 case '+':
01957                 case '?':
01958                     if ((syntax & RE_BK_PLUS_QM)
01959                         || (syntax & RE_LIMITED_OPS))
01960                         goto normal_char;
01961                 handle_plus:
01962                 case '*':
01963                     /* If there is no previous pattern... */
01964                     if (!laststart)
01965                         {
01966                             if (syntax & RE_CONTEXT_INVALID_OPS)
01967                                 FREE_STACK_RETURN (REG_BADRPT);
01968                             else if (!(syntax & RE_CONTEXT_INDEP_OPS))
01969                                 goto normal_char;
01970                         }
01971 
01972                     {
01973                         /* Are we optimizing this jump?  */
01974                         boolean keep_string_p = false;
01975 
01976                         /* 1 means zero (many) matches is allowed.  */
01977                         char zero_times_ok = 0, many_times_ok = 0;
01978 
01979                         /* If there is a sequence of repetition chars, collapse it
01980                            down to just one (the right one).  We can't combine
01981                            interval operators with these because of, e.g., `a{2}*',
01982                            which should only match an even number of `a's.  */
01983 
01984                         for (;;)
01985                             {
01986                                 zero_times_ok |= c != '+';
01987                                 many_times_ok |= c != '?';
01988 
01989                                 if (p == pend)
01990                                     break;
01991 
01992                                 PATFETCH (c);
01993 
01994                                 if (c == '*'
01995                                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
01996                                     ;
01997 
01998                                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
01999                                     {
02000                                         if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02001 
02002                                         PATFETCH (c1);
02003                                         if (!(c1 == '+' || c1 == '?'))
02004                                             {
02005                                                 PATUNFETCH;
02006                                                 PATUNFETCH;
02007                                                 break;
02008                                             }
02009 
02010                                         c = c1;
02011                                     }
02012                                 else
02013                                     {
02014                                         PATUNFETCH;
02015                                         break;
02016                                     }
02017 
02018                                 /* If we get here, we found another repeat character.  */
02019                             }
02020 
02021                         /* Star, etc. applied to an empty pattern is equivalent
02022                            to an empty pattern.  */
02023                         if (!laststart)
02024                             break;
02025 
02026                         /* Now we know whether or not zero matches is allowed
02027                            and also whether or not two or more matches is allowed.  */
02028                         if (many_times_ok)
02029                             { /* More than one repetition is allowed, so put in at the
02030                                  end a backward relative jump from `b' to before the next
02031                                  jump we're going to put in below (which jumps from
02032                                  laststart to after this jump).
02033 
02034                                  But if we are at the `*' in the exact sequence `.*\n',
02035                                  insert an unconditional jump backwards to the .,
02036                                  instead of the beginning of the loop.  This way we only
02037                                  push a failure point once, instead of every time
02038                                  through the loop.  */
02039                                 assert (p - 1 > pattern);
02040 
02041                                 /* Allocate the space for the jump.  */
02042                                 GET_BUFFER_SPACE (3);
02043 
02044                                 /* We know we are not at the first character of the pattern,
02045                                    because laststart was nonzero.  And we've already
02046                                    incremented `p', by the way, to be the character after
02047                                    the `*'.  Do we have to do something analogous here
02048                                    for null bytes, because of RE_DOT_NOT_NULL?  */
02049                                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
02050                                     && zero_times_ok
02051                                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
02052                                     && !(syntax & RE_DOT_NEWLINE))
02053                                     { /* We have .*\n.  */
02054                                         STORE_JUMP (jump, b, laststart);
02055                                         keep_string_p = true;
02056                                     }
02057                                 else
02058                                     /* Anything else.  */
02059                                     STORE_JUMP (maybe_pop_jump, b, laststart - 3);
02060 
02061                                 /* We've added more stuff to the buffer.  */
02062                                 b += 3;
02063                             }
02064 
02065                         /* On failure, jump from laststart to b + 3, which will be the
02066                            end of the buffer after this jump is inserted.  */
02067                         GET_BUFFER_SPACE (3);
02068                         INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
02069                                      : on_failure_jump,
02070                                      laststart, b + 3);
02071                         pending_exact = 0;
02072                         b += 3;
02073 
02074                         if (!zero_times_ok)
02075                             {
02076                                 /* At least one repetition is required, so insert a
02077                                    `dummy_failure_jump' before the initial
02078                                    `on_failure_jump' instruction of the loop. This
02079                                    effects a skip over that instruction the first time
02080                                    we hit that loop.  */
02081                                 GET_BUFFER_SPACE (3);
02082                                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
02083                                 b += 3;
02084                             }
02085                     }
02086                     break;
02087 
02088 
02089                 case '.':
02090                     laststart = b;
02091                     BUF_PUSH (anychar);
02092                     break;
02093 
02094 
02095                 case '[':
02096                     {
02097                         boolean had_char_class = false;
02098 
02099                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02100 
02101                         /* Ensure that we have enough space to push a charset: the
02102                            opcode, the length count, and the bitset; 34 bytes in all.  */
02103                         GET_BUFFER_SPACE (34);
02104 
02105                         laststart = b;
02106 
02107                         /* We test `*p == '^' twice, instead of using an if
02108                            statement, so we only need one BUF_PUSH.  */
02109                         BUF_PUSH (*p == '^' ? charset_not : charset);
02110                         if (*p == '^')
02111                             p++;
02112 
02113                         /* Remember the first position in the bracket expression.  */
02114                         p1 = p;
02115 
02116                         /* Push the number of bytes in the bitmap.  */
02117                         BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
02118 
02119                         /* Clear the whole map.  */
02120                         bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
02121 
02122                         /* charset_not matches newline according to a syntax bit.  */
02123                         if ((re_opcode_t) b[-2] == charset_not
02124                             && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
02125                             SET_LIST_BIT ('\n');
02126 
02127                         /* Read in characters and ranges, setting map bits.  */
02128                         for (;;)
02129                             {
02130                                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02131 
02132                                 PATFETCH (c);
02133 
02134                                 /* \ might escape characters inside [...] and [^...].  */
02135                                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
02136                                     {
02137                                         if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02138 
02139                                         PATFETCH (c1);
02140                                         SET_LIST_BIT (c1);
02141                                         continue;
02142                                     }
02143 
02144                                 /* Could be the end of the bracket expression.  If it's
02145                                    not (i.e., when the bracket expression is `[]' so
02146                                    far), the ']' character bit gets set way below.  */
02147                                 if (c == ']' && p != p1 + 1)
02148                                     break;
02149 
02150                                 /* Look ahead to see if it's a range when the last thing
02151                                    was a character class.  */
02152                                 if (had_char_class && c == '-' && *p != ']')
02153                                     FREE_STACK_RETURN (REG_ERANGE);
02154 
02155                                 /* Look ahead to see if it's a range when the last thing
02156                                    was a character: if this is a hyphen not at the
02157                                    beginning or the end of a list, then it's the range
02158                                    operator.  */
02159                                 if (c == '-'
02160                                     && !(p - 2 >= pattern && p[-2] == '[')
02161                                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
02162                                     && *p != ']')
02163                                     {
02164                                         reg_errcode_t ret
02165                                             = compile_range (&p, pend, translate, syntax, b);
02166                                         if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02167                                     }
02168 
02169                                 else if (p[0] == '-' && p[1] != ']')
02170                                     { /* This handles ranges made up of characters only.  */
02171                                         reg_errcode_t ret;
02172 
02173                                         /* Move past the `-'.  */
02174                                         PATFETCH (c1);
02175 
02176                                         ret = compile_range (&p, pend, translate, syntax, b);
02177                                         if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02178                                     }
02179 
02180                                 /* See if we're at the beginning of a possible character
02181                                    class.  */
02182 
02183                                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
02184                                     { /* Leave room for the null.  */
02185                                         char str[CHAR_CLASS_MAX_LENGTH + 1];
02186 
02187                                         PATFETCH (c);
02188                                         c1 = 0;
02189 
02190                                         /* If pattern is `[[:'.  */
02191                                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02192 
02193                                         for (;;)
02194                                             {
02195                                                 PATFETCH (c);
02196                                                 if (c == ':' || c == ']' || p == pend
02197                                                     || c1 == CHAR_CLASS_MAX_LENGTH)
02198                                                     break;
02199                                                 str[c1++] = c;
02200                                             }
02201                                         str[c1] = '\0';
02202 
02203                                         /* If isn't a word bracketed by `[:' and:`]':
02204                                            undo the ending character, the letters, and leave
02205                                            the leading `:' and `[' (but set bits for them).  */
02206                                         if (c == ':' && *p == ']')
02207                                             {
02208 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
02209                                                 boolean is_lower = STREQ (str, "lower");
02210                                                 boolean is_upper = STREQ (str, "upper");
02211                                                 wctype_t wt;
02212                                                 int ch;
02213 
02214                                                 wt = wctype (str);
02215                                                 if (wt == 0)
02216                                                     FREE_STACK_RETURN (REG_ECTYPE);
02217 
02218                                                 /* Throw away the ] at the end of the character
02219                                                    class.  */
02220                                                 PATFETCH (c);
02221 
02222                                                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02223 
02224                                                 for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
02225                                                     {
02226                                                         if (iswctype (btowc (ch), wt))
02227                                                             SET_LIST_BIT (ch);
02228 
02229                                                         if (translate && (is_upper || is_lower)
02230                                                             && (ISUPPER (ch) || ISLOWER (ch)))
02231                                                             SET_LIST_BIT (ch);
02232                                                     }
02233 
02234                                                 had_char_class = true;
02235 #else
02236                                                 int ch;
02237                                                 boolean is_alnum = STREQ (str, "alnum");
02238                                                 boolean is_alpha = STREQ (str, "alpha");
02239                                                 boolean is_blank = STREQ (str, "blank");
02240                                                 boolean is_cntrl = STREQ (str, "cntrl");
02241                                                 boolean is_digit = STREQ (str, "digit");
02242                                                 boolean is_graph = STREQ (str, "graph");
02243                                                 boolean is_lower = STREQ (str, "lower");
02244                                                 boolean is_print = STREQ (str, "print");
02245                                                 boolean is_punct = STREQ (str, "punct");
02246                                                 boolean is_space = STREQ (str, "space");
02247                                                 boolean is_upper = STREQ (str, "upper");
02248                                                 boolean is_xdigit = STREQ (str, "xdigit");
02249 
02250                                                 if (!IS_CHAR_CLASS (str))
02251                                                     FREE_STACK_RETURN (REG_ECTYPE);
02252 
02253                                                 /* Throw away the ] at the end of the character
02254                                                    class.  */
02255                                                 PATFETCH (c);
02256 
02257                                                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02258 
02259                                                 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
02260                                                     {
02261                                                         /* This was split into 3 if's to
02262                                                            avoid an arbitrary limit in some compiler.  */
02263                                                         if (   (is_alnum  && ISALNUM (ch))
02264                                                                || (is_alpha  && ISALPHA (ch))
02265                                                                || (is_blank  && ISBLANK (ch))
02266                                                                || (is_cntrl  && ISCNTRL (ch)))
02267                                                             SET_LIST_BIT (ch);
02268                                                         if (   (is_digit  && ISDIGIT (ch))
02269                                                                || (is_graph  && ISGRAPH (ch))
02270                                                                || (is_lower  && ISLOWER (ch))
02271                                                                || (is_print  && ISPRINT (ch)))
02272                                                             SET_LIST_BIT (ch);
02273                                                         if (   (is_punct  && ISPUNCT (ch))
02274                                                                || (is_space  && ISSPACE (ch))
02275                                                                || (is_upper  && ISUPPER (ch))
02276                                                                || (is_xdigit && ISXDIGIT (ch)))
02277                                                             SET_LIST_BIT (ch);
02278                                                         if (   translate && (is_upper || is_lower)
02279                                                                && (ISUPPER (ch) || ISLOWER (ch)))
02280                                                             SET_LIST_BIT (ch);
02281                                                     }
02282                                                 had_char_class = true;
02283 #endif  /* libc || wctype.h */
02284                                             }
02285                                         else
02286                                             {
02287                                                 c1++;
02288                                                 while (c1--)
02289                                                     PATUNFETCH;
02290                                                 SET_LIST_BIT ('[');
02291                                                 SET_LIST_BIT (':');
02292                                                 had_char_class = false;
02293                                             }
02294                                     }
02295                                 else
02296                                     {
02297                                         had_char_class = false;
02298                                         SET_LIST_BIT (c);
02299                                     }
02300                             }
02301 
02302                         /* Discard any (non)matching list bytes that are all 0 at the
02303                            end of the map.  Decrease the map-length byte too.  */
02304                         while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
02305                             b[-1]--;
02306                         b += b[-1];
02307                     }
02308                     break;
02309 
02310 
02311                 case '(':
02312                     if (syntax & RE_NO_BK_PARENS)
02313                         goto handle_open;
02314                     else
02315                         goto normal_char;
02316 
02317 
02318                 case ')':
02319                     if (syntax & RE_NO_BK_PARENS)
02320                         goto handle_close;
02321                     else
02322                         goto normal_char;
02323 
02324 
02325                 case '\n':
02326                     if (syntax & RE_NEWLINE_ALT)
02327                         goto handle_alt;
02328                     else
02329                         goto normal_char;
02330 
02331 
02332                 case '|':
02333                     if (syntax & RE_NO_BK_VBAR)
02334                         goto handle_alt;
02335                     else
02336                         goto normal_char;
02337 
02338 
02339                 case '{':
02340                     if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
02341                         goto handle_interval;
02342                     else
02343                         goto normal_char;
02344 
02345 
02346                 case '\\':
02347                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02348 
02349                     /* Do not translate the character after the \, so that we can
02350                        distinguish, e.g., \B from \b, even if we normally would
02351                        translate, e.g., B to b.  */
02352                     PATFETCH_RAW (c);
02353 
02354                     switch (c)
02355                         {
02356                         case '(':
02357                             if (syntax & RE_NO_BK_PARENS)
02358                                 goto normal_backslash;
02359 
02360                         handle_open:
02361                             bufp->re_nsub++;
02362                             regnum++;
02363 
02364                             if (COMPILE_STACK_FULL)
02365                                 {
02366                                     RETALLOC (compile_stack.stack, compile_stack.size << 1,
02367                                               compile_stack_elt_t);
02368                                     if (compile_stack.stack == NULL) return REG_ESPACE;
02369 
02370                                     compile_stack.size <<= 1;
02371                                 }
02372 
02373                             /* These are the values to restore when we hit end of this
02374                                group.  They are all relative offsets, so that if the
02375                                whole pattern moves because of realloc, they will still
02376                                be valid.  */
02377                             COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
02378                             COMPILE_STACK_TOP.fixup_alt_jump
02379                                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
02380                             COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
02381                             COMPILE_STACK_TOP.regnum = regnum;
02382 
02383                             /* We will eventually replace the 0 with the number of
02384                                groups inner to this one.  But do not push a
02385                                start_memory for groups beyond the last one we can
02386                                represent in the compiled pattern.  */
02387                             if (regnum <= MAX_REGNUM)
02388                                 {
02389                                     COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
02390                                     BUF_PUSH_3 (start_memory, regnum, 0);
02391                                 }
02392 
02393                             compile_stack.avail++;
02394 
02395                             fixup_alt_jump = 0;
02396                             laststart = 0;
02397                             begalt = b;
02398                             /* If we've reached MAX_REGNUM groups, then this open
02399                                won't actually generate any code, so we'll have to
02400                                clear pending_exact explicitly.  */
02401                             pending_exact = 0;
02402                             break;
02403 
02404 
02405                         case ')':
02406                             if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
02407 
02408                             if (COMPILE_STACK_EMPTY) {
02409                                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02410                                     goto normal_backslash;
02411                                 else
02412                                     FREE_STACK_RETURN (REG_ERPAREN);
02413                             }
02414                         handle_close:
02415                             if (fixup_alt_jump)
02416                                 { /* Push a dummy failure point at the end of the
02417                                      alternative for a possible future
02418                                      `pop_failure_jump' to pop.  See comments at
02419                                      `push_dummy_failure' in `re_match_2'.  */
02420                                     BUF_PUSH (push_dummy_failure);
02421 
02422                                     /* We allocated space for this jump when we assigned
02423                                        to `fixup_alt_jump', in the `handle_alt' case below.  */
02424                                     STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
02425                                 }
02426 
02427                             /* See similar code for backslashed left paren above.  */
02428                             if (COMPILE_STACK_EMPTY) {
02429                                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02430                                     goto normal_char;
02431                                 else
02432                                     FREE_STACK_RETURN (REG_ERPAREN);
02433                             }
02434                             /* Since we just checked for an empty stack above, this
02435                                ``can't happen''.  */
02436                             assert (compile_stack.avail != 0);
02437                             {
02438                                 /* We don't just want to restore into `regnum', because
02439                                    later groups should continue to be numbered higher,
02440                                    as in `(ab)c(de)' -- the second group is #2.  */
02441                                 regnum_t this_group_regnum;
02442 
02443                                 compile_stack.avail--;
02444                                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
02445                                 fixup_alt_jump
02446                                     = COMPILE_STACK_TOP.fixup_alt_jump
02447                                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
02448                                     : 0;
02449                                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
02450                                 this_group_regnum = COMPILE_STACK_TOP.regnum;
02451                                 /* If we've reached MAX_REGNUM groups, then this open
02452                                    won't actually generate any code, so we'll have to
02453                                    clear pending_exact explicitly.  */
02454                                 pending_exact = 0;
02455 
02456                                 /* We're at the end of the group, so now we know how many
02457                                    groups were inside this one.  */
02458                                 if (this_group_regnum <= MAX_REGNUM)
02459                                     {
02460                                         unsigned char *inner_group_loc
02461                                             = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
02462 
02463                                         *inner_group_loc = regnum - this_group_regnum;
02464                                         BUF_PUSH_3 (stop_memory, this_group_regnum,
02465                                                     regnum - this_group_regnum);
02466                                     }
02467                             }
02468                             break;
02469 
02470 
02471                         case '|':                                       /* `\|'.  */
02472                             if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
02473                                 goto normal_backslash;
02474                         handle_alt:
02475                             if (syntax & RE_LIMITED_OPS)
02476                                 goto normal_char;
02477 
02478                             /* Insert before the previous alternative a jump which
02479                                jumps to this alternative if the former fails.  */
02480                             GET_BUFFER_SPACE (3);
02481                             INSERT_JUMP (on_failure_jump, begalt, b + 6);
02482                             pending_exact = 0;
02483                             b += 3;
02484 
02485                             /* The alternative before this one has a jump after it
02486                                which gets executed if it gets matched.  Adjust that
02487                                jump so it will jump to this alternative's analogous
02488                                jump (put in below, which in turn will jump to the next
02489                                (if any) alternative's such jump, etc.).  The last such
02490                                jump jumps to the correct final destination.  A picture:
02491                                _____ _____
02492                                |   | |   |
02493                                |   v |   v
02494                                a | b   | c
02495 
02496                                If we are at `b', then fixup_alt_jump right now points to a
02497                                three-byte space after `a'.  We'll put in the jump, set
02498                                fixup_alt_jump to right after `b', and leave behind three
02499                                bytes which we'll fill in when we get to after `c'.  */
02500 
02501                             if (fixup_alt_jump)
02502                                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02503 
02504                             /* Mark and leave space for a jump after this alternative,
02505                                to be filled in later either by next alternative or
02506                                when know we're at the end of a series of alternatives.  */
02507                             fixup_alt_jump = b;
02508                             GET_BUFFER_SPACE (3);
02509                             b += 3;
02510 
02511                             laststart = 0;
02512                             begalt = b;
02513                             break;
02514 
02515 
02516                         case '{':
02517                             /* If \{ is a literal.  */
02518                             if (!(syntax & RE_INTERVALS)
02519                                 /* If we're at `\{' and it's not the open-interval
02520                                    operator.  */
02521                                 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
02522                                 || (p - 2 == pattern  &&  p == pend))
02523                                 goto normal_backslash;
02524 
02525                         handle_interval:
02526                             {
02527                                 /* If got here, then the syntax allows intervals.  */
02528 
02529                                 /* At least (most) this many matches must be made.  */
02530                                 int lower_bound = -1, upper_bound = -1;
02531 
02532                                 beg_interval = p - 1;
02533 
02534                                 if (p == pend)
02535                                     {
02536                                         if (syntax & RE_NO_BK_BRACES)
02537                                             goto unfetch_interval;
02538                                         else
02539                                             FREE_STACK_RETURN (REG_EBRACE);
02540                                     }
02541 
02542                                 GET_UNSIGNED_NUMBER (lower_bound);
02543 
02544                                 if (c == ',')
02545                                     {
02546                                         GET_UNSIGNED_NUMBER (upper_bound);
02547                                         if (upper_bound < 0) upper_bound = RE_DUP_MAX;
02548                                     }
02549                                 else
02550                                     /* Interval such as `{1}' => match exactly once. */
02551                                     upper_bound = lower_bound;
02552 
02553                                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
02554                                     || lower_bound > upper_bound)
02555                                     {
02556                                         if (syntax & RE_NO_BK_BRACES)
02557                                             goto unfetch_interval;
02558                                         else
02559                                             FREE_STACK_RETURN (REG_BADBR);
02560                                     }
02561 
02562                                 if (!(syntax & RE_NO_BK_BRACES))
02563                                     {
02564                                         if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
02565 
02566                                         PATFETCH (c);
02567                                     }
02568 
02569                                 if (c != '}')
02570                                     {
02571                                         if (syntax & RE_NO_BK_BRACES)
02572                                             goto unfetch_interval;
02573                                         else
02574                                             FREE_STACK_RETURN (REG_BADBR);
02575                                     }
02576 
02577                                 /* We just parsed a valid interval.  */
02578 
02579                                 /* If it's invalid to have no preceding re.  */
02580                                 if (!laststart)
02581                                     {
02582                                         if (syntax & RE_CONTEXT_INVALID_OPS)
02583                                             FREE_STACK_RETURN (REG_BADRPT);
02584                                         else if (syntax & RE_CONTEXT_INDEP_OPS)
02585                                             laststart = b;
02586                                         else
02587                                             goto unfetch_interval;
02588                                     }
02589 
02590                                 /* If the upper bound is zero, don't want to succeed at
02591                                    all; jump from `laststart' to `b + 3', which will be
02592                                    the end of the buffer after we insert the jump.  */
02593                                 if (upper_bound == 0)
02594                                     {
02595                                         GET_BUFFER_SPACE (3);
02596                                         INSERT_JUMP (jump, laststart, b + 3);
02597                                         b += 3;
02598                                     }
02599 
02600                                 /* Otherwise, we have a nontrivial interval.  When
02601                                    we're all done, the pattern will look like:
02602                                    set_number_at <jump count> <upper bound>
02603                                    set_number_at <succeed_n count> <lower bound>
02604                                    succeed_n <after jump addr> <succeed_n count>
02605                                    <body of loop>
02606                                    jump_n <succeed_n addr> <jump count>
02607                                    (The upper bound and `jump_n' are omitted if
02608                                    `upper_bound' is 1, though.)  */
02609                                 else
02610                                     { /* If the upper bound is > 1, we need to insert
02611                                          more at the end of the loop.  */
02612                                         unsigned nbytes = 10 + (upper_bound > 1) * 10;
02613 
02614                                         GET_BUFFER_SPACE (nbytes);
02615 
02616                                         /* Initialize lower bound of the `succeed_n', even
02617                                            though it will be set during matching by its
02618                                            attendant `set_number_at' (inserted next),
02619                                            because `re_compile_fastmap' needs to know.
02620                                            Jump to the `jump_n' we might insert below.  */
02621                                         INSERT_JUMP2 (succeed_n, laststart,
02622                                                       b + 5 + (upper_bound > 1) * 5,
02623                                                       lower_bound);
02624                                         b += 5;
02625 
02626                                         /* Code to initialize the lower bound.  Insert
02627                                            before the `succeed_n'.  The `5' is the last two
02628                                            bytes of this `set_number_at', plus 3 bytes of
02629                                            the following `succeed_n'.  */
02630                                         insert_op2 (set_number_at, laststart, 5, lower_bound, b);
02631                                         b += 5;
02632 
02633                                         if (upper_bound > 1)
02634                                             { /* More than one repetition is allowed, so
02635                                                  append a backward jump to the `succeed_n'
02636                                                  that starts this interval.
02637 
02638                                                  When we've reached this during matching,
02639                                                  we'll have matched the interval once, so
02640                                                  jump back only `upper_bound - 1' times.  */
02641                                                 STORE_JUMP2 (jump_n, b, laststart + 5,
02642                                                              upper_bound - 1);
02643                                                 b += 5;
02644 
02645                                                 /* The location we want to set is the second
02646                                                    parameter of the `jump_n'; that is `b-2' as
02647                                                    an absolute address.  `laststart' will be
02648                                                    the `set_number_at' we're about to insert;
02649                                                    `laststart+3' the number to set, the source
02650                                                    for the relative address.  But we are
02651                                                    inserting into the middle of the pattern --
02652                                                    so everything is getting moved up by 5.
02653                                                    Conclusion: (b - 2) - (laststart + 3) + 5,
02654                                                    i.e., b - laststart.
02655 
02656                                                    We insert this at the beginning of the loop
02657                                                    so that if we fail during matching, we'll
02658                                                    reinitialize the bounds.  */
02659                                                 insert_op2 (set_number_at, laststart, b - laststart,
02660                                                             upper_bound - 1, b);
02661                                                 b += 5;
02662                                             }
02663                                     }
02664                                 pending_exact = 0;
02665                                 beg_interval = NULL;
02666                             }
02667                             break;
02668 
02669                         unfetch_interval:
02670                             /* If an invalid interval, match the characters as literals.  */
02671                             assert (beg_interval);
02672                             p = beg_interval;
02673                             beg_interval = NULL;
02674 
02675                             /* normal_char and normal_backslash need `c'.  */
02676                             PATFETCH (c);
02677 
02678                             if (!(syntax & RE_NO_BK_BRACES))
02679                                 {
02680                                     if (p > pattern  &&  p[-1] == '\\')
02681                                         goto normal_backslash;
02682                                 }
02683                             goto normal_char;
02684 
02685 #ifdef emacs
02686                             /* There is no way to specify the before_dot and after_dot
02687                                operators.  rms says this is ok.  --karl  */
02688                         case '=':
02689                             BUF_PUSH (at_dot);
02690                             break;
02691 
02692                         case 's':
02693                             laststart = b;
02694                             PATFETCH (c);
02695                             BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
02696                             break;
02697 
02698                         case 'S':
02699                             laststart = b;
02700                             PATFETCH (c);
02701                             BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
02702                             break;
02703 #endif /* emacs */
02704 
02705 
02706                         case 'w':
02707                             if (re_syntax_options & RE_NO_GNU_OPS)
02708                                 goto normal_char;
02709                             laststart = b;
02710                             BUF_PUSH (wordchar);
02711                             break;
02712 
02713 
02714                         case 'W':
02715                             if (re_syntax_options & RE_NO_GNU_OPS)
02716                                 goto normal_char;
02717                             laststart = b;
02718                             BUF_PUSH (notwordchar);
02719                             break;
02720 
02721 
02722                         case '<':
02723                             if (re_syntax_options & RE_NO_GNU_OPS)
02724                                 goto normal_char;
02725                             BUF_PUSH (wordbeg);
02726                             break;
02727 
02728                         case '>':
02729                             if (re_syntax_options & RE_NO_GNU_OPS)
02730                                 goto normal_char;
02731                             BUF_PUSH (wordend);
02732                             break;
02733 
02734                         case 'b':
02735                             if (re_syntax_options & RE_NO_GNU_OPS)
02736                                 goto normal_char;
02737                             BUF_PUSH (wordbound);
02738                             break;
02739 
02740                         case 'B':
02741                             if (re_syntax_options & RE_NO_GNU_OPS)
02742                                 goto normal_char;
02743                             BUF_PUSH (notwordbound);
02744                             break;
02745 
02746                         case '`':
02747                             if (re_syntax_options & RE_NO_GNU_OPS)
02748                                 goto normal_char;
02749                             BUF_PUSH (begbuf);
02750                             break;
02751 
02752                         case '\'':
02753                             if (re_syntax_options & RE_NO_GNU_OPS)
02754                                 goto normal_char;
02755                             BUF_PUSH (endbuf);
02756                             break;
02757 
02758                         case '1': case '2': case '3': case '4': case '5':
02759                         case '6': case '7': case '8': case '9':
02760                             if (syntax & RE_NO_BK_REFS)
02761                                 goto normal_char;
02762 
02763                             c1 = c - '0';
02764 
02765                             if (c1 > regnum)
02766                                 FREE_STACK_RETURN (REG_ESUBREG);
02767 
02768                             /* Can't back reference to a subexpression if inside of it.  */
02769                             if (group_in_compile_stack (compile_stack, (regnum_t) c1))
02770                                 goto normal_char;
02771 
02772                             laststart = b;
02773                             BUF_PUSH_2 (duplicate, c1);
02774                             break;
02775 
02776 
02777                         case '+':
02778                         case '?':
02779                             if (syntax & RE_BK_PLUS_QM)
02780                                 goto handle_plus;
02781                             else
02782                                 goto normal_backslash;
02783 
02784                         default:
02785                         normal_backslash:
02786                             /* You might think it would be useful for \ to mean
02787                                not to translate; but if we don't translate it
02788                                it will never match anything.  */
02789                             c = TRANSLATE (c);
02790                             goto normal_char;
02791                         }
02792                     break;
02793 
02794 
02795                 default:
02796                     /* Expects the character in `c'.  */
02797                 normal_char:
02798                     /* If no exactn currently being built.  */
02799                     if (!pending_exact
02800 
02801                         /* If last exactn not at current position.  */
02802                         || pending_exact + *pending_exact + 1 != b
02803 
02804                         /* We have only one byte following the exactn for the count.  */
02805                         || *pending_exact == (1 << BYTEWIDTH) - 1
02806 
02807                         /* If followed by a repetition operator.  */
02808                         || *p == '*' || *p == '^'
02809                         || ((syntax & RE_BK_PLUS_QM)
02810                             ? *p == '\\' && (p[1] == '+' || p[1] == '?')
02811                             : (*p == '+' || *p == '?'))
02812                         || ((syntax & RE_INTERVALS)
02813                             && ((syntax & RE_NO_BK_BRACES)
02814                                 ? *p == '{'
02815                                 : (p[0] == '\\' && p[1] == '{'))))
02816                         {
02817                             /* Start building a new exactn.  */
02818 
02819                             laststart = b;
02820 
02821                             BUF_PUSH_2 (exactn, 0);
02822                             pending_exact = b - 1;
02823                         }
02824 
02825                     BUF_PUSH (c);
02826                     (*pending_exact)++;
02827                     break;
02828                 } /* switch (c) */
02829         } /* while p != pend */
02830 
02831 
02832     /* Through the pattern now.  */
02833 
02834     if (fixup_alt_jump)
02835         STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02836 
02837     if (!COMPILE_STACK_EMPTY)
02838         FREE_STACK_RETURN (REG_EPAREN);
02839 
02840   /* If we don't want backtracking, force success
02841      the first time we reach the end of the compiled pattern.  */
02842     if (syntax & RE_NO_POSIX_BACKTRACKING)
02843         BUF_PUSH (succeed);
02844 
02845     free (compile_stack.stack);
02846 
02847     /* We have succeeded; set the length of the buffer.  */
02848     bufp->used = b - bufp->buffer;
02849 
02850 #ifdef DEBUG
02851     if (debug)
02852         {
02853             DEBUG_PRINT1 ("\nCompiled pattern: \n");
02854             print_compiled_pattern (bufp);
02855         }
02856 #endif /* DEBUG */
02857 
02858 #ifndef MATCH_MAY_ALLOCATE
02859     /* Initialize the failure stack to the largest possible stack.  This
02860      isn't necessary unless we're trying to avoid calling alloca in
02861      the search and match routines.  */
02862     {
02863         int num_regs = bufp->re_nsub + 1;
02864 
02865         /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
02866            is strictly greater than re_max_failures, the largest possible stack
02867            is 2 * re_max_failures failure points.  */
02868         if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
02869             {
02870                 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
02871 
02872 #ifdef emacs
02873                 if (! fail_stack.stack)
02874                     fail_stack.stack
02875                         = (fail_stack_elt_t *) xmalloc (fail_stack.size
02876                                                         * sizeof (fail_stack_elt_t));
02877                 else
02878                     fail_stack.stack
02879                         = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
02880                                                          (fail_stack.size
02881                                                           * sizeof (fail_stack_elt_t)));
02882 #else /* not emacs */
02883                 if (! fail_stack.stack)
02884                     fail_stack.stack
02885                         = (fail_stack_elt_t *) malloc (fail_stack.size
02886                                                        * sizeof (fail_stack_elt_t));
02887                 else
02888                     fail_stack.stack
02889                         = (fail_stack_elt_t *) realloc (fail_stack.stack,
02890                                                         (fail_stack.size
02891                                                          * sizeof (fail_stack_elt_t)));
02892 #endif /* not emacs */
02893             }
02894 
02895         regex_grow_registers (num_regs);
02896     }
02897 #endif /* not MATCH_MAY_ALLOCATE */
02898 
02899     return REG_NOERROR;
02900 } /* regex_compile */
02901 
02902 /* Subroutines for `regex_compile'.  */
02903 
02904 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
02905 
02906 static void
02907 store_op1 (re_opcode_t op,
02908            unsigned char *loc,
02909            int arg)
02910 {
02911     *loc = (unsigned char) op;
02912     STORE_NUMBER (loc + 1, arg);
02913 }
02914 
02915 
02916 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
02917 
02918 static void
02919 store_op2(re_opcode_t op,
02920           unsigned char *loc,
02921           int arg1,
02922           int arg2)
02923 {
02924     *loc = (unsigned char) op;
02925     STORE_NUMBER (loc + 1, arg1);
02926     STORE_NUMBER (loc + 3, arg2);
02927 }
02928 
02929 
02930 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
02931    for OP followed by two-byte integer parameter ARG.  */
02932 
02933 static void
02934 insert_op1(re_opcode_t op,
02935            unsigned char *loc,
02936            int arg,
02937            unsigned char *end)
02938 {
02939     register unsigned char *pfrom = end;
02940     register unsigned char *pto = end + 3;
02941 
02942     while (pfrom != loc)
02943         *--pto = *--pfrom;
02944 
02945     store_op1 (op, loc, arg);
02946 }
02947 
02948 
02949 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
02950 
02951 static void
02952 insert_op2(re_opcode_t op,
02953            unsigned char *loc,
02954            int arg1,
02955            int arg2,
02956            unsigned char *end)
02957 {
02958     register unsigned char *pfrom = end;
02959     register unsigned char *pto = end + 5;
02960 
02961     while (pfrom != loc)
02962         *--pto = *--pfrom;
02963 
02964     store_op2 (op, loc, arg1, arg2);
02965 }
02966 
02967 
02968 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
02969    after an alternative or a begin-subexpression.  We assume there is at
02970    least one character before the ^.  */
02971 
02972 static boolean
02973 at_begline_loc_p(const char *pattern,
02974                  const char *p,
02975                  reg_syntax_t syntax)
02976 {
02977     const char *prev = p - 2;
02978     boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
02979 
02980     return
02981         /* After a subexpression?  */
02982         (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
02983         /* After an alternative?  */
02984         || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
02985 }
02986 
02987 
02988 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
02989    at least one character after the $, i.e., `P < PEND'.  */
02990 
02991 static boolean
02992 at_endline_loc_p(const char *p,
02993                  const char *pend,
02994                  reg_syntax_t syntax)
02995 {
02996     const char *next = p;
02997     boolean next_backslash = *next == '\\';
02998     const char *next_next = p + 1 < pend ? p + 1 : 0;
02999 
03000     return
03001         /* Before a subexpression?  */
03002         (syntax & RE_NO_BK_PARENS ? *next == ')'
03003          : next_backslash && next_next && *next_next == ')')
03004         /* Before an alternative?  */
03005         || (syntax & RE_NO_BK_VBAR ? *next == '|'
03006             : next_backslash && next_next && *next_next == '|');
03007 }
03008 
03009 
03010 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
03011    false if it's not.  */
03012 
03013 static boolean
03014 group_in_compile_stack(compile_stack_type compile_stack,
03015                        regnum_t regnum)
03016 {
03017     int this_element;
03018 
03019     for (this_element = compile_stack.avail - 1;
03020          this_element >= 0;
03021          this_element--)
03022         if (compile_stack.stack[this_element].regnum == regnum)
03023             return true;
03024 
03025     return false;
03026 }
03027 
03028 
03029 /* Read the ending character of a range (in a bracket expression) from the
03030    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
03031    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
03032    Then we set the translation of all bits between the starting and
03033    ending characters (inclusive) in the compiled pattern B.
03034 
03035    Return an error code.
03036 
03037    We use these short variable names so we can use the same macros as
03038    `regex_compile' itself.  */
03039 
03040 static reg_errcode_t
03041 compile_range(const char **p_ptr,
03042               const char *pend,
03043               RE_TRANSLATE_TYPE translate,
03044               reg_syntax_t syntax,
03045               unsigned char *b)
03046 {
03047     unsigned this_char;
03048 
03049     const char *p = *p_ptr;
03050     unsigned int range_start, range_end;
03051 
03052     if (p == pend)
03053         return REG_ERANGE;
03054 
03055     /* Even though the pattern is a signed `char *', we need to fetch
03056        with unsigned char *'s; if the high bit of the pattern character
03057        is set, the range endpoints will be negative if we fetch using a
03058        signed char *.
03059 
03060        We also want to fetch the endpoints without translating them; the
03061        appropriate translation is done in the bit-setting loop below.  */
03062     /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
03063     range_start = ((const unsigned char *) p)[-2];
03064     range_end   = ((const unsigned char *) p)[0];
03065 
03066     /* Have to increment the pointer into the pattern string, so the
03067        caller isn't still at the ending character.  */
03068     (*p_ptr)++;
03069 
03070     /* If the start is after the end, the range is empty.  */
03071     if (range_start > range_end)
03072         return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
03073 
03074     /* Here we see why `this_char' has to be larger than an `unsigned
03075        char' -- the range is inclusive, so if `range_end' == 0xff
03076        (assuming 8-bit characters), we would otherwise go into an infinite
03077        loop, since all characters <= 0xff.  */
03078     for (this_char = range_start; this_char <= range_end; this_char++)
03079         {
03080             SET_LIST_BIT (TRANSLATE (this_char));
03081         }
03082 
03083     return REG_NOERROR;
03084 }
03085 
03086 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
03087    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
03088    characters can start a string that matches the pattern.  This fastmap
03089    is used by re_search to skip quickly over impossible starting points.
03090 
03091    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
03092    area as BUFP->fastmap.
03093 
03094    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
03095    the pattern buffer.
03096 
03097    Returns 0 if we succeed, -2 if an internal error.   */
03098 
03099 int
03100 re_compile_fastmap(struct re_pattern_buffer *bufp)
03101 {
03102     int j, k;
03103 #ifdef MATCH_MAY_ALLOCATE
03104     fail_stack_type fail_stack;
03105 #endif
03106 #ifndef REGEX_MALLOC
03107     char *destination;
03108 #endif
03109 
03110     register char *fastmap = bufp->fastmap;
03111     unsigned char *pattern = bufp->buffer;
03112     unsigned char *p = pattern;
03113     register unsigned char *pend = pattern + bufp->used;
03114 
03115 #ifdef REL_ALLOC
03116     /* This holds the pointer to the failure stack, when
03117        it is allocated relocatably.  */
03118     fail_stack_elt_t *failure_stack_ptr;
03119 #endif
03120 
03121     /* Assume that each path through the pattern can be null until
03122        proven otherwise.  We set this false at the bottom of switch
03123        statement, to which we get only if a particular path doesn't
03124        match the empty string.  */
03125     boolean path_can_be_null = true;
03126 
03127     /* We aren't doing a `succeed_n' to begin with.  */
03128     boolean succeed_n_p = false;
03129 
03130     assert (fastmap != NULL && p != NULL);
03131 
03132     INIT_FAIL_STACK ();
03133     bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
03134     bufp->fastmap_accurate = 1;     /* It will be when we're done.  */
03135     bufp->can_be_null = 0;
03136 
03137     while (1)
03138         {
03139             if (p == pend || *p == succeed)
03140                 {
03141                     /* We have reached the (effective) end of pattern.  */
03142                     if (!FAIL_STACK_EMPTY ())
03143                         {
03144                             bufp->can_be_null |= path_can_be_null;
03145 
03146                             /* Reset for next path.  */
03147                             path_can_be_null = true;
03148 
03149                             p = fail_stack.stack[--fail_stack.avail].pointer;
03150 
03151                             continue;
03152                         }
03153                     else
03154                         break;
03155                 }
03156 
03157             /* We should never be about to go beyond the end of the pattern.  */
03158             assert (p < pend);
03159 
03160             switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
03161                 {
03162 
03163                     /* I guess the idea here is to simply not bother with a fastmap
03164                        if a backreference is used, since it's too hard to figure out
03165                        the fastmap for the corresponding group.  Setting
03166                        `can_be_null' stops `re_search_2' from using the fastmap, so
03167                        that is all we do.  */
03168                 case duplicate:
03169                     bufp->can_be_null = 1;
03170                     goto done;
03171 
03172 
03173                     /* Following are the cases which match a character.  These end
03174                        with `break'.  */
03175 
03176                 case exactn:
03177                     fastmap[p[1]] = 1;
03178                     break;
03179 
03180 
03181                 case charset:
03182                     for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
03183                         if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
03184                             fastmap[j] = 1;
03185                     break;
03186 
03187 
03188                 case charset_not:
03189                     /* Chars beyond end of map must be allowed.  */
03190                     for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
03191                         fastmap[j] = 1;
03192 
03193                     for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
03194                         if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
03195                             fastmap[j] = 1;
03196                     break;
03197 
03198 
03199                 case wordchar:
03200                     for (j = 0; j < (1 << BYTEWIDTH); j++)
03201                         if (SYNTAX (j) == Sword)
03202                             fastmap[j] = 1;
03203                     break;
03204 
03205 
03206                 case notwordchar:
03207                     for (j = 0; j < (1 << BYTEWIDTH); j++)
03208                         if (SYNTAX (j) != Sword)
03209                             fastmap[j] = 1;
03210                     break;
03211 
03212 
03213                 case anychar:
03214                     {
03215                         int fastmap_newline = fastmap['\n'];
03216 
03217                         /* `.' matches anything ...  */
03218                         for (j = 0; j < (1 << BYTEWIDTH); j++)
03219                             fastmap[j] = 1;
03220 
03221                         /* ... except perhaps newline.  */
03222                         if (!(bufp->syntax & RE_DOT_NEWLINE))
03223                             fastmap['\n'] = fastmap_newline;
03224 
03225                         /* Return if we have already set `can_be_null'; if we have,
03226                            then the fastmap is irrelevant.  Something's wrong here.  */
03227                         else if (bufp->can_be_null)
03228                             goto done;
03229 
03230                         /* Otherwise, have to check alternative paths.  */
03231                         break;
03232                     }
03233 
03234 #ifdef emacs
03235                 case syntaxspec:
03236                     k = *p++;
03237                     for (j = 0; j < (1 << BYTEWIDTH); j++)
03238                         if (SYNTAX (j) == (enum syntaxcode) k)
03239                             fastmap[j] = 1;
03240                     break;
03241 
03242 
03243                 case notsyntaxspec:
03244                     k = *p++;
03245                     for (j = 0; j < (1 << BYTEWIDTH); j++)
03246                         if (SYNTAX (j) != (enum syntaxcode) k)
03247                             fastmap[j] = 1;
03248                     break;
03249 
03250 
03251                     /* All cases after this match the empty string.  These end with
03252                        `continue'.  */
03253 
03254 
03255                 case before_dot:
03256                 case at_dot:
03257                 case after_dot:
03258                     continue;
03259 #endif /* emacs */
03260 
03261 
03262                 case no_op:
03263                 case begline:
03264                 case endline:
03265                 case begbuf:
03266                 case endbuf:
03267                 case wordbound:
03268                 case notwordbound:
03269                 case wordbeg:
03270                 case wordend:
03271                 case push_dummy_failure:
03272                     continue;
03273 
03274 
03275                 case jump_n:
03276                 case pop_failure_jump:
03277                 case maybe_pop_jump:
03278                 case jump:
03279                 case jump_past_alt:
03280                 case dummy_failure_jump:
03281                     EXTRACT_NUMBER_AND_INCR (j, p);
03282                     p += j;
03283                     if (j > 0)
03284                         continue;
03285 
03286                     /* Jump backward implies we just went through the body of a
03287                        loop and matched nothing.  Opcode jumped to should be
03288                        `on_failure_jump' or `succeed_n'.  Just treat it like an
03289                        ordinary jump.  For a * loop, it has pushed its failure
03290                        point already; if so, discard that as redundant.  */
03291                     if ((re_opcode_t) *p != on_failure_jump
03292                         && (re_opcode_t) *p != succeed_n)
03293                         continue;
03294 
03295                     p++;
03296                     EXTRACT_NUMBER_AND_INCR (j, p);
03297                     p += j;
03298 
03299                     /* If what's on the stack is where we are now, pop it.  */
03300                     if (!FAIL_STACK_EMPTY ()
03301                         && fail_stack.stack[fail_stack.avail - 1].pointer == p)
03302                         fail_stack.avail--;
03303 
03304                     continue;
03305 
03306 
03307                 case on_failure_jump:
03308                 case on_failure_keep_string_jump:
03309                 handle_on_failure_jump:
03310                 EXTRACT_NUMBER_AND_INCR (j, p);
03311 
03312                 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
03313                    end of the pattern.  We don't want to push such a point,
03314                    since when we restore it above, entering the switch will
03315                    increment `p' past the end of the pattern.  We don't need
03316                    to push such a point since we obviously won't find any more
03317                    fastmap entries beyond `pend'.  Such a pattern can match
03318                    the null string, though.  */
03319                 if (p + j < pend)
03320                     {
03321                         if (!PUSH_PATTERN_OP (p + j, fail_stack))
03322                             {
03323                                 RESET_FAIL_STACK ();
03324                                 return -2;
03325                             }
03326                     }
03327                 else
03328                     bufp->can_be_null = 1;
03329 
03330                 if (succeed_n_p)
03331                     {
03332                         EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n.  */
03333                         succeed_n_p = false;
03334                     }
03335 
03336                 continue;
03337 
03338 
03339                 case succeed_n:
03340                     /* Get to the number of times to succeed.  */
03341                     p += 2;
03342 
03343                     /* Increment p past the n for when k != 0.  */
03344                     EXTRACT_NUMBER_AND_INCR (k, p);
03345                     if (k == 0)
03346                         {
03347                             p -= 4;
03348                             succeed_n_p = true;  /* Spaghetti code alert.  */
03349                             goto handle_on_failure_jump;
03350                         }
03351                     continue;
03352 
03353 
03354                 case set_number_at:
03355                     p += 4;
03356                     continue;
03357 
03358 
03359                 case start_memory:
03360                 case stop_memory:
03361                     p += 2;
03362                     continue;
03363 
03364 
03365                 default:
03366                     abort (); /* We have listed all the cases.  */
03367                 } /* switch *p++ */
03368 
03369             /* Getting here means we have found the possible starting
03370                characters for one path of the pattern -- and that the empty
03371                string does not match.  We need not follow this path further.
03372                Instead, look at the next alternative (remembered on the
03373                stack), or quit if no more.  The test at the top of the loop
03374                does these things.  */
03375             path_can_be_null = false;
03376             p = pend;
03377         } /* while p */
03378 
03379     /* Set `can_be_null' for the last path (also the first path, if the
03380        pattern is empty).  */
03381     bufp->can_be_null |= path_can_be_null;
03382 
03383  done:
03384     RESET_FAIL_STACK ();
03385     return 0;
03386 } /* re_compile_fastmap */
03387 
03388 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
03389    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
03390    this memory for recording register information.  STARTS and ENDS
03391    must be allocated using the malloc library routine, and must each
03392    be at least NUM_REGS * sizeof (regoff_t) bytes long.
03393 
03394    If NUM_REGS == 0, then subsequent matches should allocate their own
03395    register data.
03396 
03397    Unless this function is called, the first search or match using
03398    PATTERN_BUFFER will allocate its own register data, without
03399    freeing the old data.  */
03400 
03401 void
03402 re_set_registers(struct re_pattern_buffer *bufp,
03403                  struct re_registers *regs,
03404                  unsigned num_regs,
03405                  regoff_t *starts,
03406                  regoff_t *ends)
03407 {
03408     if (num_regs)
03409         {
03410             bufp->regs_allocated = REGS_REALLOCATE;
03411             regs->num_regs = num_regs;
03412             regs->start = starts;
03413             regs->end = ends;
03414         }
03415     else
03416         {
03417             bufp->regs_allocated = REGS_UNALLOCATED;
03418             regs->num_regs = 0;
03419             regs->start = regs->end = (regoff_t *) 0;
03420         }
03421 }
03422 
03423 /* Searching routines.  */
03424 
03425 /* Like re_search_2, below, but only one string is specified, and
03426    doesn't let you say where to stop matching. */
03427 
03428 int
03429 re_search(struct re_pattern_buffer *bufp,
03430           const char *string,
03431           int size,
03432           int startpos,
03433           int range,
03434           struct re_registers *regs)
03435 {
03436     return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
03437                         regs, size);
03438 }
03439 
03440 
03441 /* Using the compiled pattern in BUFP->buffer, first tries to match the
03442    virtual concatenation of STRING1 and STRING2, starting first at index
03443    STARTPOS, then at STARTPOS + 1, and so on.
03444 
03445    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
03446 
03447    RANGE is how far to scan while trying to match.  RANGE = 0 means try
03448    only at STARTPOS; in general, the last start tried is STARTPOS +
03449    RANGE.
03450 
03451    In REGS, return the indices of the virtual concatenation of STRING1
03452    and STRING2 that matched the entire BUFP->buffer and its contained
03453    subexpressions.
03454 
03455    Do not consider matching one past the index STOP in the virtual
03456    concatenation of STRING1 and STRING2.
03457 
03458    We return either the position in the strings at which the match was
03459    found, -1 if no match, or -2 if error (such as failure
03460    stack overflow).  */
03461 
03462 int
03463 re_search_2(struct re_pattern_buffer *bufp,
03464             const char *string1,
03465             int size1,
03466             const char *string2,
03467             int size2,
03468             int startpos,
03469             int range,
03470             struct re_registers *regs,
03471             int stop)
03472 {
03473     int val;
03474     register char *fastmap = bufp->fastmap;
03475     register RE_TRANSLATE_TYPE translate = bufp->translate;
03476     int total_size = size1 + size2;
03477     int endpos = startpos + range;
03478 
03479     /* Check for out-of-range STARTPOS.  */
03480     if (startpos < 0 || startpos > total_size)
03481         return -1;
03482 
03483     /* Fix up RANGE if it might eventually take us outside
03484        the virtual concatenation of STRING1 and STRING2.
03485        Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
03486     if (endpos < 0)
03487         range = 0 - startpos;
03488     else if (endpos > total_size)
03489         range = total_size - startpos;
03490 
03491     /* If the search isn't to be a backwards one, don't waste time in a
03492        search for a pattern that must be anchored.  */
03493     if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
03494         {
03495             if (startpos > 0)
03496                 return -1;
03497             else
03498                 range = 1;
03499         }
03500 
03501 #ifdef emacs
03502     /* In a forward search for something that starts with \=.
03503        don't keep searching past point.  */
03504     if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
03505         {
03506             range = PT - startpos;
03507             if (range <= 0)
03508                 return -1;
03509         }
03510 #endif /* emacs */
03511 
03512     /* Update the fastmap now if not correct already.  */
03513     if (fastmap && !bufp->fastmap_accurate)
03514         if (re_compile_fastmap (bufp) == -2)
03515             return -2;
03516 
03517     /* Loop through the string, looking for a place to start matching.  */
03518     for (;;)
03519         {
03520             /* If a fastmap is supplied, skip quickly over characters that
03521                cannot be the start of a match.  If the pattern can match the
03522                null string, however, we don't need to skip characters; we want
03523                the first null string.  */
03524             if (fastmap && startpos < total_size && !bufp->can_be_null)
03525                 {
03526                     if (range > 0)      /* Searching forwards.  */
03527                         {
03528                             register const char *d;
03529                             register int lim = 0;
03530                             int irange = range;
03531 
03532                             if (startpos < size1 && startpos + range >= size1)
03533                                 lim = range - (size1 - startpos);
03534 
03535                             d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
03536 
03537                             /* Written out as an if-else to avoid testing `translate'
03538                                inside the loop.  */
03539                             if (translate)
03540                                 while (range > lim
03541                                        && !fastmap[(unsigned char)
03542                                                   translate[(unsigned char) *d++]])
03543                                     range--;
03544                             else
03545                                 while (range > lim && !fastmap[(unsigned char) *d++])
03546                                     range--;
03547 
03548                             startpos += irange - range;
03549                         }
03550                     else                                /* Searching backwards.  */
03551                         {
03552                             register char c = (size1 == 0 || startpos >= size1
03553                                                ? string2[startpos - size1]
03554                                                : string1[startpos]);
03555 
03556                             if (!fastmap[(unsigned char) TRANSLATE (c)])
03557                                 goto advance;
03558                         }
03559                 }
03560 
03561             /* If can't match the null string, and that's all we have left, fail.  */
03562             if (range >= 0 && startpos == total_size && fastmap
03563                 && !bufp->can_be_null)
03564                 return -1;
03565 
03566             val = re_match_2_internal (bufp, string1, size1, string2, size2,
03567                                        startpos, regs, stop);
03568 #ifndef REGEX_MALLOC
03569 #ifdef C_ALLOCA
03570             alloca (0);
03571 #endif
03572 #endif
03573 
03574             if (val >= 0)
03575                 return startpos;
03576 
03577             if (val == -2)
03578                 return -2;
03579 
03580         advance:
03581             if (!range)
03582                 break;
03583             else if (range > 0)
03584                 {
03585                     range--;
03586                     startpos++;
03587                 }
03588             else
03589                 {
03590                     range++;
03591                     startpos--;
03592                 }
03593         }
03594     return -1;
03595 } /* re_search_2 */
03596 
03597 /* This converts PTR, a pointer into one of the search strings `string1'
03598    and `string2' into an offset from the beginning of that string.  */
03599 #define POINTER_TO_OFFSET(ptr)                  \
03600   (FIRST_STRING_P (ptr)                         \
03601    ? ((regoff_t) ((ptr) - string1))             \
03602    : ((regoff_t) ((ptr) - string2 + size1)))
03603 
03604 /* Macros for dealing with the split strings in re_match_2.  */
03605 
03606 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
03607 
03608 /* Call before fetching a character with *d.  This switches over to
03609    string2 if necessary.  */
03610 #define PREFETCH()                                                      \
03611   while (d == dend)                                                     \
03612     {                                                                   \
03613       /* End of string2 => fail.  */                                    \
03614       if (dend == end_match_2)                                          \
03615         goto fail;                                                      \
03616       /* End of string1 => advance to string2.  */                      \
03617       d = string2;                                                      \
03618       dend = end_match_2;                                               \
03619     }
03620 
03621 
03622 /* Test if at very beginning or at very end of the virtual concatenation
03623    of `string1' and `string2'.  If only one string, it's `string2'.  */
03624 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
03625 #define AT_STRINGS_END(d) ((d) == end2)
03626 
03627 
03628 /* Test if D points to a character which is word-constituent.  We have
03629    two special cases to check for: if past the end of string1, look at
03630    the first character in string2; and if before the beginning of
03631    string2, look at the last character in string1.  */
03632 #define WORDCHAR_P(d)                                                   \
03633   (SYNTAX ((d) == end1 ? *string2                                       \
03634            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
03635    == Sword)
03636 
03637 /* Disabled due to a compiler bug -- see comment at case wordbound */
03638 #if 0
03639 /* Test if the character before D and the one at D differ with respect
03640    to being word-constituent.  */
03641 #define AT_WORD_BOUNDARY(d)                                             \
03642   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
03643    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
03644 #endif
03645 
03646 /* Free everything we malloc.  */
03647 #ifdef MATCH_MAY_ALLOCATE
03648 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
03649 #define FREE_VARIABLES()                                                \
03650   do {                                                                  \
03651     REGEX_FREE_STACK (fail_stack.stack);                                \
03652     FREE_VAR ((void*) regstart);                                        \
03653     FREE_VAR ((void*) regend);                                          \
03654     FREE_VAR ((void*) old_regstart);                                    \
03655     FREE_VAR ((void*) old_regend);                                      \
03656     FREE_VAR ((void*) best_regstart);                                   \
03657     FREE_VAR ((void*) best_regend);                                     \
03658     FREE_VAR ((void*) reg_info);                                        \
03659     FREE_VAR ((void*) reg_dummy);                                       \
03660     FREE_VAR ((void*) reg_info_dummy);                                  \
03661   } while (0)
03662 #else
03663 #define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
03664 #endif /* not MATCH_MAY_ALLOCATE */
03665 
03666 /* These values must meet several constraints.  They must not be valid
03667    register values; since we have a limit of 255 registers (because
03668    we use only one byte in the pattern for the register number), we can
03669    use numbers larger than 255.  They must differ by 1, because of
03670    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
03671    be larger than the value for the highest register, so we do not try
03672    to actually save any registers when none are active.  */
03673 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
03674 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
03675 
03676 /* Matching routines.  */
03677 
03678 #ifndef emacs   /* Emacs never uses this.  */
03679 /* re_match is like re_match_2 except it takes only a single string.  */
03680 
03681 int
03682 re_match(struct re_pattern_buffer *bufp,
03683          const char *string,
03684          int size,
03685          int pos,
03686          struct re_registers *regs)
03687 {
03688     int result = re_match_2_internal (bufp, NULL, 0, string, size,
03689                                       pos, regs, size);
03690 #ifndef REGEX_MALLOC
03691 #ifdef C_ALLOCA
03692     alloca (0);
03693 #endif
03694 #endif
03695     return result;
03696 }
03697 #endif /* not emacs */
03698 
03699 static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
03700                                                     unsigned char *end,
03701                                                     register_info_type *reg_info));
03702 static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
03703                                                   unsigned char *end,
03704                                                   register_info_type *reg_info));
03705 static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
03706                                                         unsigned char *end,
03707                                                         register_info_type *reg_info));
03708 static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
03709                                      int len, char *translate));
03710 
03711 /* re_match_2 matches the compiled pattern in BUFP against the
03712    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
03713    and SIZE2, respectively).  We start matching at POS, and stop
03714    matching at STOP.
03715 
03716    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
03717    store offsets for the substring each group matched in REGS.  See the
03718    documentation for exactly how many groups we fill.
03719 
03720    We return -1 if no match, -2 if an internal error (such as the
03721    failure stack overflowing).  Otherwise, we return the length of the
03722    matched substring.  */
03723 
03724 int
03725 re_match_2(struct re_pattern_buffer *bufp,
03726            const char *string1,
03727            int size1,
03728            const char *string2,
03729            int size2,
03730            int pos,
03731            struct re_registers *regs,
03732            int stop)
03733 {
03734     int result = re_match_2_internal (bufp, string1, size1, string2, size2,
03735                                       pos, regs, stop);
03736 #ifndef REGEX_MALLOC
03737 #ifdef C_ALLOCA
03738     alloca (0);
03739 #endif
03740 #endif
03741     return result;
03742 }
03743 
03744 /* This is a separate function so that we can force an alloca cleanup
03745    afterwards.  */
03746 static int
03747 re_match_2_internal(struct re_pattern_buffer *bufp,
03748                     const char *string1,
03749                     int size1,
03750                     const char *string2,
03751                     int size2,
03752                     int pos,
03753                     struct re_registers *regs,
03754                     int stop)
03755 {
03756     /* General temporaries.  */
03757     int mcnt;
03758     unsigned char *p1;
03759 
03760     /* Just past the end of the corresponding string.  */
03761     const char *end1, *end2;
03762 
03763     /* Pointers into string1 and string2, just past the last characters in
03764      each to consider matching.  */
03765     const char *end_match_1, *end_match_2;
03766 
03767     /* Where we are in the data, and the end of the current string.  */
03768     const char *d, *dend;
03769 
03770     /* Where we are in the pattern, and the end of the pattern.  */
03771     unsigned char *p = bufp->buffer;
03772     register unsigned char *pend = p + bufp->used;
03773 
03774   /* Mark the opcode just after a start_memory, so we can test for an
03775      empty subpattern when we get to the stop_memory.  */
03776     unsigned char *just_past_start_mem = 0;
03777 
03778     /* We use this to map every character in the string.  */
03779     RE_TRANSLATE_TYPE translate = bufp->translate;
03780 
03781     /* Failure point stack.  Each place that can handle a failure further
03782      down the line pushes a failure point on this stack.  It consists of
03783      restart, regend, and reg_info for all registers corresponding to
03784      the subexpressions we're currently inside, plus the number of such
03785      registers, and, finally, two char *'s.  The first char * is where
03786      to resume scanning the pattern; the second one is where to resume
03787      scanning the strings.  If the latter is zero, the failure point is
03788      a ``dummy''; if a failure happens and the failure point is a dummy,
03789      it gets discarded and the next next one is tried.  */
03790 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
03791     fail_stack_type fail_stack;
03792 #endif
03793 #ifdef DEBUG
03794     static unsigned failure_id = 0;
03795     unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
03796 #endif
03797 
03798 #ifdef REL_ALLOC
03799     /* This holds the pointer to the failure stack, when
03800      it is allocated relocatably.  */
03801     fail_stack_elt_t *failure_stack_ptr;
03802 #endif
03803 
03804     /* We fill all the registers internally, independent of what we
03805      return, for use in backreferences.  The number here includes
03806      an element for register zero.  */
03807     size_t num_regs = bufp->re_nsub + 1;
03808 
03809     /* The currently active registers.  */
03810     active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03811     active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03812 
03813   /* Information on the contents of registers. These are pointers into
03814      the input strings; they record just what was matched (on this
03815      attempt) by a subexpression part of the pattern, that is, the
03816      regnum-th regstart pointer points to where in the pattern we began
03817      matching and the regnum-th regend points to right after where we
03818      stopped matching the regnum-th subexpression.  (The zeroth register
03819      keeps track of what the whole pattern matches.)  */
03820 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03821     const char **regstart, **regend;
03822 #endif
03823 
03824     /* If a group that's operated upon by a repetition operator fails to
03825      match anything, then the register for its start will need to be
03826      restored because it will have been set to wherever in the string we
03827      are when we last see its open-group operator.  Similarly for a
03828      register's end.  */
03829 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03830     const char **old_regstart, **old_regend;
03831 #endif
03832 
03833     /* The is_active field of reg_info helps us keep track of which (possibly
03834      nested) subexpressions we are currently in. The matched_something
03835      field of reg_info[reg_num] helps us tell whether or not we have
03836      matched any of the pattern so far this time through the reg_num-th
03837      subexpression.  These two fields get reset each time through any
03838      loop their register is in.  */
03839 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
03840     register_info_type *reg_info;
03841 #endif
03842 
03843     /* The following record the register info as found in the above
03844      variables when we find a match better than any we've seen before.
03845      This happens as we backtrack through the failure points, which in
03846      turn happens only if we have not yet matched the entire string. */
03847     unsigned best_regs_set = false;
03848 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03849     const char **best_regstart, **best_regend;
03850 #endif
03851 
03852     /* Logically, this is `best_regend[0]'.  But we don't want to have to
03853      allocate space for that if we're not allocating space for anything
03854      else (see below).  Also, we never need info about register 0 for
03855      any of the other register vectors, and it seems rather a kludge to
03856      treat `best_regend' differently than the rest.  So we keep track of
03857      the end of the best match so far in a separate variable.  We
03858      initialize this to NULL so that when we backtrack the first time
03859      and need to test it, it's not garbage.  */
03860     const char *match_end = NULL;
03861 
03862     /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
03863     int set_regs_matched_done = 0;
03864 
03865     /* Used when we pop values we don't care about.  */
03866 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
03867     const char **reg_dummy;
03868     register_info_type *reg_info_dummy;
03869 #endif
03870 
03871 #ifdef DEBUG
03872     /* Counts the total number of registers pushed.  */
03873     unsigned num_regs_pushed = 0;
03874 #endif
03875 
03876     DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
03877 
03878     INIT_FAIL_STACK ();
03879 
03880 #ifdef MATCH_MAY_ALLOCATE
03881     /* Do not bother to initialize all the register variables if there are
03882      no groups in the pattern, as it takes a fair amount of time.  If
03883      there are groups, we include space for register 0 (the whole
03884      pattern), even though we never use it, since it simplifies the
03885      array indexing.  We should fix this.  */
03886     if (bufp->re_nsub)
03887         {
03888             regstart = REGEX_TALLOC (num_regs, const char *);
03889             regend = REGEX_TALLOC (num_regs, const char *);
03890             old_regstart = REGEX_TALLOC (num_regs, const char *);
03891             old_regend = REGEX_TALLOC (num_regs, const char *);
03892             best_regstart = REGEX_TALLOC (num_regs, const char *);
03893             best_regend = REGEX_TALLOC (num_regs, const char *);
03894             reg_info = REGEX_TALLOC (num_regs, register_info_type);
03895             reg_dummy = REGEX_TALLOC (num_regs, const char *);
03896             reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
03897 
03898             if (!(regstart && regend && old_regstart && old_regend && reg_info
03899                   && best_regstart && best_regend && reg_dummy && reg_info_dummy))
03900                 {
03901                     FREE_VARIABLES ();
03902                     return -2;
03903                 }
03904         }
03905     else
03906         {
03907             /* We must initialize all our variables to NULL, so that
03908                `FREE_VARIABLES' doesn't try to free them.  */
03909             regstart = regend = old_regstart = old_regend = best_regstart
03910                 = best_regend = reg_dummy = NULL;
03911             reg_info = reg_info_dummy = (register_info_type *) NULL;
03912         }
03913 #endif /* MATCH_MAY_ALLOCATE */
03914 
03915     /* The starting position is bogus.  */
03916     if (pos < 0 || pos > size1 + size2)
03917         {
03918             FREE_VARIABLES ();
03919             return -1;
03920         }
03921 
03922     /* Initialize subexpression text positions to -1 to mark ones that no
03923      start_memory/stop_memory has been seen for. Also initialize the
03924      register information struct.  */
03925     for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
03926         {
03927             regstart[mcnt] = regend[mcnt]
03928                 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
03929 
03930             REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
03931             IS_ACTIVE (reg_info[mcnt]) = 0;
03932             MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03933             EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03934         }
03935 
03936     /* We move `string1' into `string2' if the latter's empty -- but not if
03937      `string1' is null.  */
03938     if (size2 == 0 && string1 != NULL)
03939         {
03940             string2 = string1;
03941             size2 = size1;
03942             string1 = 0;
03943             size1 = 0;
03944         }
03945     end1 = string1 + size1;
03946     end2 = string2 + size2;
03947 
03948     /* Compute where to stop matching, within the two strings.  */
03949     if (stop <= size1)
03950         {
03951             end_match_1 = string1 + stop;
03952             end_match_2 = string2;
03953         }
03954     else
03955         {
03956             end_match_1 = end1;
03957             end_match_2 = string2 + stop - size1;
03958         }
03959 
03960     /* `p' scans through the pattern as `d' scans through the data.
03961      `dend' is the end of the input string that `d' points within.  `d'
03962      is advanced into the following input string whenever necessary, but
03963      this happens before fetching; therefore, at the beginning of the
03964      loop, `d' can be pointing at the end of a string, but it cannot
03965      equal `string2'.  */
03966     if (size1 > 0 && pos <= size1)
03967         {
03968             d = string1 + pos;
03969             dend = end_match_1;
03970         }
03971     else
03972         {
03973             d = string2 + pos - size1;
03974             dend = end_match_2;
03975         }
03976 
03977     DEBUG_PRINT1 ("The compiled pattern is:\n");
03978     DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
03979     DEBUG_PRINT1 ("The string to match is: `");
03980     DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
03981     DEBUG_PRINT1 ("'\n");
03982 
03983   /* This loops over pattern commands.  It exits by returning from the
03984      function if the match is complete, or it drops through if the match
03985      fails at this starting point in the input data.  */
03986     for (;;)
03987         {
03988 #ifdef _LIBC
03989             DEBUG_PRINT2 ("\n%p: ", p);
03990 #else
03991             DEBUG_PRINT2 ("\n0x%x: ", p);
03992 #endif
03993 
03994             if (p == pend)
03995                 { /* End of pattern means we might have succeeded.  */
03996                     DEBUG_PRINT1 ("end of pattern ... ");
03997 
03998                     /* If we haven't matched the entire string, and we want the
03999                        longest match, try backtracking.  */
04000                     if (d != end_match_2)
04001                         {
04002                             /* 1 if this match ends in the same string (string1 or string2)
04003                                as the best previous match.  */
04004                             boolean same_str_p = (FIRST_STRING_P (match_end)
04005                                                   == MATCHING_IN_FIRST_STRING);
04006                             /* 1 if this match is the best seen so far.  */
04007                             boolean best_match_p;
04008 
04009                             /* AIX compiler got confused when this was combined
04010                                with the previous declaration.  */
04011                             if (same_str_p)
04012                                 best_match_p = d > match_end;
04013                             else
04014                                 best_match_p = !MATCHING_IN_FIRST_STRING;
04015 
04016                             DEBUG_PRINT1 ("backtracking.\n");
04017 
04018                             if (!FAIL_STACK_EMPTY ())
04019                                 { /* More failure points to try.  */
04020 
04021                                     /* If exceeds best match so far, save it.  */
04022                                     if (!best_regs_set || best_match_p)
04023                                         {
04024                                             best_regs_set = true;
04025                                             match_end = d;
04026 
04027                                             DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
04028 
04029                                             for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
04030                                                 {
04031                                                     best_regstart[mcnt] = regstart[mcnt];
04032                                                     best_regend[mcnt] = regend[mcnt];
04033                                                 }
04034                                         }
04035                                     goto fail;
04036                                 }
04037 
04038                             /* If no failure points, don't restore garbage.  And if
04039                                last match is real best match, don't restore second
04040                                best one. */
04041                             else if (best_regs_set && !best_match_p)
04042                                 {
04043                                 restore_best_regs:
04044                                     /* Restore best match.  It may happen that `dend ==
04045                                        end_match_1' while the restored d is in string2.
04046                                        For example, the pattern `x.*y.*z' against the
04047                                        strings `x-' and `y-z-', if the two strings are
04048                                        not consecutive in memory.  */
04049                                     DEBUG_PRINT1 ("Restoring best registers.\n");
04050 
04051                                     d = match_end;
04052                                     dend = ((d >= string1 && d <= end1)
04053                                             ? end_match_1 : end_match_2);
04054 
04055                                     for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
04056                                         {
04057                                             regstart[mcnt] = best_regstart[mcnt];
04058                                             regend[mcnt] = best_regend[mcnt];
04059                                         }
04060                                 }
04061                         } /* d != end_match_2 */
04062 
04063                 succeed_label:
04064                     DEBUG_PRINT1 ("Accepting match.\n");
04065 
04066                     /* If caller wants register contents data back, do it.  */
04067                     if (regs && !bufp->no_sub)
04068                         {
04069                             /* Have the register data arrays been allocated?  */
04070                             if (bufp->regs_allocated == REGS_UNALLOCATED)
04071                                 { /* No.  So allocate them with malloc.  We need one
04072                                      extra element beyond `num_regs' for the `-1' marker
04073                                      GNU code uses.  */
04074                                     regs->num_regs = MAX (RE_NREGS, num_regs + 1);
04075                                     regs->start = TALLOC (regs->num_regs, regoff_t);
04076                                     regs->end = TALLOC (regs->num_regs, regoff_t);
04077                                     if (regs->start == NULL || regs->end == NULL)
04078                                         {
04079                                             FREE_VARIABLES ();
04080                                             return -2;
04081                                         }
04082                                     bufp->regs_allocated = REGS_REALLOCATE;
04083                                 }
04084                             else if (bufp->regs_allocated == REGS_REALLOCATE)
04085                                 { /* Yes.  If we need more elements than were already
04086                                      allocated, reallocate them.  If we need fewer, just
04087                                      leave it alone.  */
04088                                     if (regs->num_regs < num_regs + 1)
04089                                         {
04090                                             regs->num_regs = num_regs + 1;
04091                                             RETALLOC (regs->start, regs->num_regs, regoff_t);
04092                                             RETALLOC (regs->end, regs->num_regs, regoff_t);
04093                                             if (regs->start == NULL || regs->end == NULL)
04094                                                 {
04095                                                     FREE_VARIABLES ();
04096                                                     return -2;
04097                                                 }
04098                                         }
04099                                 }
04100                             else
04101                                 {
04102                                     /* These braces fend off a "empty body in an else-statement"
04103                                        warning under GCC when assert expands to nothing.  */
04104                                     assert (bufp->regs_allocated == REGS_FIXED);
04105                                 }
04106 
04107                             /* Convert the pointer data in `regstart' and `regend' to
04108                                indices.  Register zero has to be set differently,
04109                                since we haven't kept track of any info for it.  */
04110                             if (regs->num_regs > 0)
04111                                 {
04112                                     regs->start[0] = pos;
04113                                     regs->end[0] = (MATCHING_IN_FIRST_STRING
04114                                                     ? ((regoff_t) (d - string1))
04115                                                     : ((regoff_t) (d - string2 + size1)));
04116                                 }
04117 
04118                             /* Go through the first `min (num_regs, regs->num_regs)'
04119                                registers, since that is all we initialized.  */
04120                             for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
04121                                  mcnt++)
04122                                 {
04123                                     if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
04124                                         regs->start[mcnt] = regs->end[mcnt] = -1;
04125                                     else
04126                                         {
04127                                             regs->start[mcnt]
04128                                                 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
04129                                             regs->end[mcnt]
04130                                                 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
04131                                         }
04132                                 }
04133 
04134                             /* If the regs structure we return has more elements than
04135                                were in the pattern, set the extra elements to -1.  If
04136                                we (re)allocated the registers, this is the case,
04137                                because we always allocate enough to have at least one
04138                                -1 at the end.  */
04139                             for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
04140                                 regs->start[mcnt] = regs->end[mcnt] = -1;
04141                         } /* regs && !bufp->no_sub */
04142 
04143                     DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
04144                                   nfailure_points_pushed, nfailure_points_popped,
04145                                   nfailure_points_pushed - nfailure_points_popped);
04146                     DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
04147 
04148                     mcnt = d - pos - (MATCHING_IN_FIRST_STRING
04149                                       ? string1
04150                                       : string2 - size1);
04151 
04152                     DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
04153 
04154                     FREE_VARIABLES ();
04155                     return mcnt;
04156                 }
04157 
04158             /* Otherwise match next pattern command.  */
04159             switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
04160                 {
04161                     /* Ignore these.  Used to ignore the n of succeed_n's which
04162                        currently have n == 0.  */
04163                 case no_op:
04164                     DEBUG_PRINT1 ("EXECUTING no_op.\n");
04165                     break;
04166 
04167                 case succeed:
04168                     DEBUG_PRINT1 ("EXECUTING succeed.\n");
04169                     goto succeed_label;
04170 
04171                     /* Match the next n pattern characters exactly.  The following
04172                        byte in the pattern defines n, and the n bytes after that
04173                        are the characters to match.  */
04174                 case exactn:
04175                     mcnt = *p++;
04176                     DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
04177 
04178                     /* This is written out as an if-else so we don't waste time
04179                        testing `translate' inside the loop.  */
04180                     if (translate)
04181                         {
04182                             do
04183                                 {
04184                                     PREFETCH ();
04185                                     if ((unsigned char) translate[(unsigned char) *d++]
04186                                         != (unsigned char) *p++)
04187                                         goto fail;
04188                                 }
04189                             while (--mcnt);
04190                         }
04191                     else
04192                         {
04193                             do
04194                                 {
04195                                     PREFETCH ();
04196                                     if (*d++ != (char) *p++) goto fail;
04197                                 }
04198                             while (--mcnt);
04199                         }
04200                     SET_REGS_MATCHED ();
04201                     break;
04202 
04203 
04204                     /* Match any character except possibly a newline or a null.  */
04205                 case anychar:
04206                     DEBUG_PRINT1 ("EXECUTING anychar.\n");
04207 
04208                     PREFETCH ();
04209 
04210                     if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
04211                         || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
04212                         goto fail;
04213 
04214                     SET_REGS_MATCHED ();
04215                     DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
04216                     d++;
04217                     break;
04218 
04219 
04220                 case charset:
04221                 case charset_not:
04222                     {
04223                         register unsigned char c;
04224                         boolean not = (re_opcode_t) *(p - 1) == charset_not;
04225 
04226                         DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
04227 
04228                         PREFETCH ();
04229                         c = TRANSLATE (*d); /* The character to match.  */
04230 
04231                         /* Cast to `unsigned' instead of `unsigned char' in case the
04232                            bit list is a full 32 bytes long.  */
04233                         if (c < (unsigned) (*p * BYTEWIDTH)
04234                             && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04235                             not = !not;
04236 
04237                         p += 1 + *p;
04238 
04239                         if (!not) goto fail;
04240 
04241                         SET_REGS_MATCHED ();
04242                         d++;
04243                         break;
04244                     }
04245 
04246 
04247                     /* The beginning of a group is represented by start_memory.
04248                        The arguments are the register number in the next byte, and the
04249                        number of groups inner to this one in the next.  The text
04250                        matched within the group is recorded (in the internal
04251                        registers data structure) under the register number.  */
04252                 case start_memory:
04253                     DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
04254 
04255                     /* Find out if this group can match the empty string.  */
04256                     p1 = p;             /* To send to group_match_null_string_p.  */
04257 
04258                     if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
04259                         REG_MATCH_NULL_STRING_P (reg_info[*p])
04260                             = group_match_null_string_p (&p1, pend, reg_info);
04261 
04262                     /* Save the position in the string where we were the last time
04263                        we were at this open-group operator in case the group is
04264                        operated upon by a repetition operator, e.g., with `(a*)*b'
04265                        against `ab'; then we want to ignore where we are now in
04266                        the string in case this attempt to match fails.  */
04267                     old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04268                         ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
04269                         : regstart[*p];
04270                     DEBUG_PRINT2 ("  old_regstart: %d\n",
04271                                   POINTER_TO_OFFSET (old_regstart[*p]));
04272 
04273                     regstart[*p] = d;
04274                     DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
04275 
04276                     IS_ACTIVE (reg_info[*p]) = 1;
04277                     MATCHED_SOMETHING (reg_info[*p]) = 0;
04278 
04279                     /* Clear this whenever we change the register activity status.  */
04280                     set_regs_matched_done = 0;
04281 
04282                     /* This is the new highest active register.  */
04283                     highest_active_reg = *p;
04284 
04285                     /* If nothing was active before, this is the new lowest active
04286                        register.  */
04287                     if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04288                         lowest_active_reg = *p;
04289 
04290                     /* Move past the register number and inner group count.  */
04291                     p += 2;
04292                     just_past_start_mem = p;
04293 
04294                     break;
04295 
04296 
04297                     /* The stop_memory opcode represents the end of a group.  Its
04298                        arguments are the same as start_memory's: the register
04299                        number, and the number of inner groups.  */
04300                 case stop_memory:
04301                     DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
04302 
04303                     /* We need to save the string position the last time we were at
04304                        this close-group operator in case the group is operated
04305                        upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
04306                        against `aba'; then we want to ignore where we are now in
04307                        the string in case this attempt to match fails.  */
04308                     old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04309                         ? REG_UNSET (regend[*p]) ? d : regend[*p]
04310                         : regend[*p];
04311                     DEBUG_PRINT2 ("      old_regend: %d\n",
04312                                   POINTER_TO_OFFSET (old_regend[*p]));
04313 
04314                     regend[*p] = d;
04315                     DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
04316 
04317                     /* This register isn't active anymore.  */
04318                     IS_ACTIVE (reg_info[*p]) = 0;
04319 
04320                     /* Clear this whenever we change the register activity status.  */
04321                     set_regs_matched_done = 0;
04322 
04323                     /* If this was the only register active, nothing is active
04324                        anymore.  */
04325                     if (lowest_active_reg == highest_active_reg)
04326                         {
04327                             lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04328                             highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04329                         }
04330                     else
04331                         { /* We must scan for the new highest active register, since
04332                              it isn't necessarily one less than now: consider
04333                              (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
04334                              new highest active register is 1.  */
04335                             unsigned char r = *p - 1;
04336                             while (r > 0 && !IS_ACTIVE (reg_info[r]))
04337                                 r--;
04338 
04339                             /* If we end up at register zero, that means that we saved
04340                                the registers as the result of an `on_failure_jump', not
04341                                a `start_memory', and we jumped to past the innermost
04342                                `stop_memory'.  For example, in ((.)*) we save
04343                                registers 1 and 2 as a result of the *, but when we pop
04344                                back to the second ), we are at the stop_memory 1.
04345                                Thus, nothing is active.  */
04346                             if (r == 0)
04347                                 {
04348                                     lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04349                                     highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04350                                 }
04351                             else
04352                                 highest_active_reg = r;
04353                         }
04354 
04355                     /* If just failed to match something this time around with a
04356                        group that's operated on by a repetition operator, try to
04357                        force exit from the ``loop'', and restore the register
04358                        information for this group that we had before trying this
04359                        last match.  */
04360                     if ((!MATCHED_SOMETHING (reg_info[*p])
04361                          || just_past_start_mem == p - 1)
04362                         && (p + 2) < pend)
04363                         {
04364                             boolean is_a_jump_n = false;
04365 
04366                             p1 = p + 2;
04367                             mcnt = 0;
04368                             switch ((re_opcode_t) *p1++)
04369                                 {
04370                                 case jump_n:
04371                                     is_a_jump_n = true;
04372                                 case pop_failure_jump:
04373                                 case maybe_pop_jump:
04374                                 case jump:
04375                                 case dummy_failure_jump:
04376                                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04377                                     if (is_a_jump_n)
04378                                         p1 += 2;
04379                                     break;
04380 
04381                                 default:
04382                                     /* do nothing */ ;
04383                                 }
04384                             p1 += mcnt;
04385 
04386                             /* If the next operation is a jump backwards in the pattern
04387                                to an on_failure_jump right before the start_memory
04388                                corresponding to this stop_memory, exit from the loop
04389                                by forcing a failure after pushing on the stack the
04390                                on_failure_jump's jump in the pattern, and d.  */
04391                             if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
04392                                 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
04393                                 {
04394                                     /* If this group ever matched anything, then restore
04395                                        what its registers were before trying this last
04396                                        failed match, e.g., with `(a*)*b' against `ab' for
04397                                        regstart[1], and, e.g., with `((a*)*(b*)*)*'
04398                                        against `aba' for regend[3].
04399 
04400                                        Also restore the registers for inner groups for,
04401                                        e.g., `((a*)(b*))*' against `aba' (register 3 would
04402                                        otherwise get trashed).  */
04403 
04404                                     if (EVER_MATCHED_SOMETHING (reg_info[*p]))
04405                                         {
04406                                             unsigned r;
04407 
04408                                             EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
04409 
04410                                             /* Restore this and inner groups' (if any) registers.  */
04411                                             for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
04412                                                  r++)
04413                                                 {
04414                                                     regstart[r] = old_regstart[r];
04415 
04416                                                     /* xx why this test?  */
04417                                                     if (old_regend[r] >= regstart[r])
04418                                                         regend[r] = old_regend[r];
04419                                                 }
04420                                         }
04421                                     p1++;
04422                                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04423                                     PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
04424 
04425                                     goto fail;
04426                                 }
04427                         }
04428 
04429                     /* Move past the register number and the inner group count.  */
04430                     p += 2;
04431                     break;
04432 
04433 
04434                     /* <digit> has been turned into a `duplicate' command which is
04435                        followed by the numeric value of <digit> as the register number.  */
04436                 case duplicate:
04437                     {
04438                         register const char *d2, *dend2;
04439                         int regno = *p++;   /* Get which register to match against.  */
04440                         DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
04441 
04442                         /* Can't back reference a group which we've never matched.  */
04443                         if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
04444                             goto fail;
04445 
04446                         /* Where in input to try to start matching.  */
04447                         d2 = regstart[regno];
04448 
04449                         /* Where to stop matching; if both the place to start and
04450                            the place to stop matching are in the same string, then
04451                            set to the place to stop, otherwise, for now have to use
04452                            the end of the first string.  */
04453 
04454                         dend2 = ((FIRST_STRING_P (regstart[regno])
04455                                   == FIRST_STRING_P (regend[regno]))
04456                                  ? regend[regno] : end_match_1);
04457                         for (;;)
04458                             {
04459                                 /* If necessary, advance to next segment in register
04460                                    contents.  */
04461                                 while (d2 == dend2)
04462                                     {
04463                                         if (dend2 == end_match_2) break;
04464                                         if (dend2 == regend[regno]) break;
04465 
04466                                         /* End of string1 => advance to string2. */
04467                                         d2 = string2;
04468                                         dend2 = regend[regno];
04469                                     }
04470                                 /* At end of register contents => success */
04471                                 if (d2 == dend2) break;
04472 
04473                                 /* If necessary, advance to next segment in data.  */
04474                                 PREFETCH ();
04475 
04476                                 /* How many characters left in this segment to match.  */
04477                                 mcnt = dend - d;
04478 
04479                                 /* Want how many consecutive characters we can match in
04480                                    one shot, so, if necessary, adjust the count.  */
04481                                 if (mcnt > dend2 - d2)
04482                                     mcnt = dend2 - d2;
04483 
04484                                 /* Compare that many; failure if mismatch, else move
04485                                    past them.  */
04486                                 if (translate
04487                                     ? bcmp_translate (d, d2, mcnt, translate)
04488                                     : bcmp (d, d2, mcnt))
04489                                     goto fail;
04490                                 d += mcnt, d2 += mcnt;
04491 
04492                                 /* Do this because we've match some characters.  */
04493                                 SET_REGS_MATCHED ();
04494                             }
04495                     }
04496                     break;
04497 
04498 
04499                     /* begline matches the empty string at the beginning of the string
04500                        (unless `not_bol' is set in `bufp'), and, if
04501                        `newline_anchor' is set, after newlines.  */
04502                 case begline:
04503                     DEBUG_PRINT1 ("EXECUTING begline.\n");
04504 
04505                     if (AT_STRINGS_BEG (d))
04506                         {
04507                             if (!bufp->not_bol) break;
04508                         }
04509                     else if (d[-1] == '\n' && bufp->newline_anchor)
04510                         {
04511                             break;
04512                         }
04513                     /* In all other cases, we fail.  */
04514                     goto fail;
04515 
04516 
04517                     /* endline is the dual of begline.  */
04518                 case endline:
04519                     DEBUG_PRINT1 ("EXECUTING endline.\n");
04520 
04521                     if (AT_STRINGS_END (d))
04522                         {
04523                             if (!bufp->not_eol) break;
04524                         }
04525 
04526                     /* We have to ``prefetch'' the next character.  */
04527                     else if ((d == end1 ? *string2 : *d) == '\n'
04528                              && bufp->newline_anchor)
04529                         {
04530                             break;
04531                         }
04532                     goto fail;
04533 
04534 
04535                     /* Match at the very beginning of the data.  */
04536                 case begbuf:
04537                     DEBUG_PRINT1 ("EXECUTING begbuf.\n");
04538                     if (AT_STRINGS_BEG (d))
04539                         break;
04540                     goto fail;
04541 
04542 
04543                     /* Match at the very end of the data.  */
04544                 case endbuf:
04545                     DEBUG_PRINT1 ("EXECUTING endbuf.\n");
04546                     if (AT_STRINGS_END (d))
04547                         break;
04548                     goto fail;
04549 
04550 
04551                     /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
04552                        pushes NULL as the value for the string on the stack.  Then
04553                        `pop_failure_point' will keep the current value for the
04554                        string, instead of restoring it.  To see why, consider
04555                        matching `foo\nbar' against `.*\n'.  The .* matches the foo;
04556                        then the . fails against the \n.  But the next thing we want
04557                        to do is match the \n against the \n; if we restored the
04558                        string value, we would be back at the foo.
04559 
04560                        Because this is used only in specific cases, we don't need to
04561                        check all the things that `on_failure_jump' does, to make
04562                        sure the right things get saved on the stack.  Hence we don't
04563                        share its code.  The only reason to push anything on the
04564                        stack at all is that otherwise we would have to change
04565                        `anychar's code to do something besides goto fail in this
04566                        case; that seems worse than this.  */
04567                 case on_failure_keep_string_jump:
04568                     DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
04569 
04570                     EXTRACT_NUMBER_AND_INCR (mcnt, p);
04571 #ifdef _LIBC
04572                     DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
04573 #else
04574                     DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
04575 #endif
04576 
04577                     PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
04578                     break;
04579 
04580 
04581                     /* Uses of on_failure_jump:
04582 
04583                        Each alternative starts with an on_failure_jump that points
04584                        to the beginning of the next alternative.  Each alternative
04585                        except the last ends with a jump that in effect jumps past
04586                        the rest of the alternatives.  (They really jump to the
04587                        ending jump of the following alternative, because tensioning
04588                        these jumps is a hassle.)
04589 
04590                        Repeats start with an on_failure_jump that points past both
04591                        the repetition text and either the following jump or
04592                        pop_failure_jump back to this on_failure_jump.  */
04593                 case on_failure_jump:
04594                 on_failure:
04595                 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
04596 
04597                 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04598 #ifdef _LIBC
04599                 DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
04600 #else
04601                 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
04602 #endif
04603 
04604                 /* If this on_failure_jump comes right before a group (i.e.,
04605                    the original * applied to a group), save the information
04606                    for that group and all inner ones, so that if we fail back
04607                    to this point, the group's information will be correct.
04608                    For example, in \(a*\)*\1, we need the preceding group,
04609                    and in \(zz\(a*\)b*\)\2, we need the inner group.  */
04610 
04611                 /* We can't use `p' to check ahead because we push
04612                    a failure point to `p + mcnt' after we do this.  */
04613                 p1 = p;
04614 
04615                 /* We need to skip no_op's before we look for the
04616                    start_memory in case this on_failure_jump is happening as
04617                    the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
04618                    against aba.  */
04619                 while (p1 < pend && (re_opcode_t) *p1 == no_op)
04620                     p1++;
04621 
04622                 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
04623                     {
04624                         /* We have a new highest active register now.  This will
04625                            get reset at the start_memory we are about to get to,
04626                            but we will have saved all the registers relevant to
04627                            this repetition op, as described above.  */
04628                         highest_active_reg = *(p1 + 1) + *(p1 + 2);
04629                         if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04630                             lowest_active_reg = *(p1 + 1);
04631                     }
04632 
04633                 DEBUG_PRINT1 (":\n");
04634                 PUSH_FAILURE_POINT (p + mcnt, d, -2);
04635                 break;
04636 
04637 
04638                 /* A smart repeat ends with `maybe_pop_jump'.
04639                    We change it to either `pop_failure_jump' or `jump'.  */
04640                 case maybe_pop_jump:
04641                     EXTRACT_NUMBER_AND_INCR (mcnt, p);
04642                     DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
04643                     {
04644                         register unsigned char *p2 = p;
04645 
04646                         /* Compare the beginning of the repeat with what in the
04647                            pattern follows its end. If we can establish that there
04648                            is nothing that they would both match, i.e., that we
04649                            would have to backtrack because of (as in, e.g., `a*a')
04650                            then we can change to pop_failure_jump, because we'll
04651                            never have to backtrack.
04652 
04653                            This is not true in the case of alternatives: in
04654                            `(a|ab)*' we do need to backtrack to the `ab' alternative
04655                            (e.g., if the string was `ab').  But instead of trying to
04656                            detect that here, the alternative has put on a dummy
04657                            failure point which is what we will end up popping.  */
04658 
04659                         /* Skip over open/close-group commands.
04660                            If what follows this loop is a ...+ construct,
04661                            look at what begins its body, since we will have to
04662                            match at least one of that.  */
04663                         while (1)
04664                             {
04665                                 if (p2 + 2 < pend
04666                                     && ((re_opcode_t) *p2 == stop_memory
04667                                         || (re_opcode_t) *p2 == start_memory))
04668                                     p2 += 3;
04669                                 else if (p2 + 6 < pend
04670                                          && (re_opcode_t) *p2 == dummy_failure_jump)
04671                                     p2 += 6;
04672                                 else
04673                                     break;
04674                             }
04675 
04676                         p1 = p + mcnt;
04677                         /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
04678                            to the `maybe_finalize_jump' of this case.  Examine what
04679                            follows.  */
04680 
04681                         /* If we're at the end of the pattern, we can change.  */
04682                         if (p2 == pend)
04683                             {
04684                                 /* Consider what happens when matching ":\(.*\)"
04685                                    against ":/".  I don't really understand this code
04686                                    yet.  */
04687                                 p[-3] = (unsigned char) pop_failure_jump;
04688                                 DEBUG_PRINT1
04689                                     ("  End of pattern: change to `pop_failure_jump'.\n");
04690                             }
04691 
04692                         else if ((re_opcode_t) *p2 == exactn
04693                                  || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
04694                             {
04695                                 register unsigned char c
04696                                     = *p2 == (unsigned char) endline ? '\n' : p2[2];
04697 
04698                                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
04699                                     {
04700                                         p[-3] = (unsigned char) pop_failure_jump;
04701                                         DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
04702                                                       c, p1[5]);
04703                                     }
04704 
04705                                 else if ((re_opcode_t) p1[3] == charset
04706                                          || (re_opcode_t) p1[3] == charset_not)
04707                                     {
04708                                         int not = (re_opcode_t) p1[3] == charset_not;
04709 
04710                                         if (c < (unsigned char) (p1[4] * BYTEWIDTH)
04711                                             && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04712                                             not = !not;
04713 
04714                                         /* `not' is equal to 1 if c would match, which means
04715                                            that we can't change to pop_failure_jump.  */
04716                                         if (!not)
04717                                             {
04718                                                 p[-3] = (unsigned char) pop_failure_jump;
04719                                                 DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04720                                             }
04721                                     }
04722                             }
04723                         else if ((re_opcode_t) *p2 == charset)
04724                             {
04725 #ifdef DEBUG
04726                                 register unsigned char c
04727                                     = *p2 == (unsigned char) endline ? '\n' : p2[2];
04728 #endif
04729 
04730 #if 0
04731                                 if ((re_opcode_t) p1[3] == exactn
04732                                     && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
04733                                           && (p2[2 + p1[5] / BYTEWIDTH]
04734                                               & (1 << (p1[5] % BYTEWIDTH)))))
04735 #else
04736                                     if ((re_opcode_t) p1[3] == exactn
04737                                         && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
04738                                               && (p2[2 + p1[4] / BYTEWIDTH]
04739                                                   & (1 << (p1[4] % BYTEWIDTH)))))
04740 #endif
04741                                         {
04742                                             p[-3] = (unsigned char) pop_failure_jump;
04743                                             DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
04744                                                           c, p1[5]);
04745                                         }
04746 
04747                                     else if ((re_opcode_t) p1[3] == charset_not)
04748                                         {
04749                                             int idx;
04750                                             /* We win if the charset_not inside the loop
04751                                                lists every character listed in the charset after.  */
04752                                             for (idx = 0; idx < (int) p2[1]; idx++)
04753                                                 if (! (p2[2 + idx] == 0
04754                                                        || (idx < (int) p1[4]
04755                                                            && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
04756                                                     break;
04757 
04758                                             if (idx == p2[1])
04759                                                 {
04760                                                     p[-3] = (unsigned char) pop_failure_jump;
04761                                                     DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04762                                                 }
04763                                         }
04764                                     else if ((re_opcode_t) p1[3] == charset)
04765                                         {
04766                                             int idx;
04767                                             /* We win if the charset inside the loop
04768                                                has no overlap with the one after the loop.  */
04769                                             for (idx = 0;
04770                                                  idx < (int) p2[1] && idx < (int) p1[4];
04771                                                  idx++)
04772                                                 if ((p2[2 + idx] & p1[5 + idx]) != 0)
04773                                                     break;
04774 
04775                                             if (idx == p2[1] || idx == p1[4])
04776                                                 {
04777                                                     p[-3] = (unsigned char) pop_failure_jump;
04778                                                     DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
04779                                                 }
04780                                         }
04781                             }
04782                     }
04783                     p -= 2;             /* Point at relative address again.  */
04784                     if ((re_opcode_t) p[-1] != pop_failure_jump)
04785                         {
04786                             p[-1] = (unsigned char) jump;
04787                             DEBUG_PRINT1 ("  Match => jump.\n");
04788                             goto unconditional_jump;
04789                         }
04790                     /* Note fall through.  */
04791 
04792 
04793                     /* The end of a simple repeat has a pop_failure_jump back to
04794                        its matching on_failure_jump, where the latter will push a
04795                        failure point.  The pop_failure_jump takes off failure
04796                        points put on by this pop_failure_jump's matching
04797                        on_failure_jump; we got through the pattern to here from the
04798                        matching on_failure_jump, so didn't fail.  */
04799                 case pop_failure_jump:
04800                     {
04801                         /* We need to pass separate storage for the lowest and
04802                            highest registers, even though we don't care about the
04803                            actual values.  Otherwise, we will restore only one
04804                            register from the stack, since lowest will == highest in
04805                            `pop_failure_point'.  */
04806                         active_reg_t dummy_low_reg, dummy_high_reg;
04807                         unsigned char *pdummy;
04808                         const char *sdummy;
04809 
04810                         DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
04811                         POP_FAILURE_POINT (sdummy, pdummy,
04812                                            dummy_low_reg, dummy_high_reg,
04813                                            reg_dummy, reg_dummy, reg_info_dummy);
04814                     }
04815                     /* Note fall through.  */
04816 
04817                 unconditional_jump:
04818 #ifdef _LIBC
04819                 DEBUG_PRINT2 ("\n%p: ", p);
04820 #else
04821                 DEBUG_PRINT2 ("\n0x%x: ", p);
04822 #endif
04823                 /* Note fall through.  */
04824 
04825                 /* Unconditionally jump (without popping any failure points).  */
04826                 case jump:
04827                     EXTRACT_NUMBER_AND_INCR (mcnt, p);  /* Get the amount to jump.  */
04828                     DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
04829                     p += mcnt;                          /* Do the jump.  */
04830 #ifdef _LIBC
04831                     DEBUG_PRINT2 ("(to %p).\n", p);
04832 #else
04833                     DEBUG_PRINT2 ("(to 0x%x).\n", p);
04834 #endif
04835                     break;
04836 
04837 
04838                     /* We need this opcode so we can detect where alternatives end
04839                        in `group_match_null_string_p' et al.  */
04840                 case jump_past_alt:
04841                     DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
04842                     goto unconditional_jump;
04843 
04844 
04845                     /* Normally, the on_failure_jump pushes a failure point, which
04846                        then gets popped at pop_failure_jump.  We will end up at
04847                        pop_failure_jump, also, and with a pattern of, say, `a+', we
04848                        are skipping over the on_failure_jump, so we have to push
04849                        something meaningless for pop_failure_jump to pop.  */
04850                 case dummy_failure_jump:
04851                     DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
04852                     /* It doesn't matter what we push for the string here.  What
04853                        the code at `fail' tests is the value for the pattern.  */
04854                     PUSH_FAILURE_POINT (0, 0, -2);
04855                     goto unconditional_jump;
04856 
04857 
04858                     /* At the end of an alternative, we need to push a dummy failure
04859                        point in case we are followed by a `pop_failure_jump', because
04860                        we don't want the failure point for the alternative to be
04861                        popped.  For example, matching `(a|ab)*' against `aab'
04862                        requires that we match the `ab' alternative.  */
04863                 case push_dummy_failure:
04864                     DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
04865                     /* See comments just above at `dummy_failure_jump' about the
04866                        two zeroes.  */
04867                     PUSH_FAILURE_POINT (0, 0, -2);
04868                     break;
04869 
04870                     /* Have to succeed matching what follows at least n times.
04871                        After that, handle like `on_failure_jump'.  */
04872                 case succeed_n:
04873                     EXTRACT_NUMBER (mcnt, p + 2);
04874                     DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
04875 
04876                     assert (mcnt >= 0);
04877                     /* Originally, this is how many times we HAVE to succeed.  */
04878                     if (mcnt > 0)
04879                         {
04880                             mcnt--;
04881                             p += 2;
04882                             STORE_NUMBER_AND_INCR (p, mcnt);
04883 #ifdef _LIBC
04884                             DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
04885 #else
04886                             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
04887 #endif
04888                         }
04889                     else if (mcnt == 0)
04890                         {
04891 #ifdef _LIBC
04892                             DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
04893 #else
04894                             DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
04895 #endif
04896                             p[2] = (unsigned char) no_op;
04897                             p[3] = (unsigned char) no_op;
04898                             goto on_failure;
04899                         }
04900                     break;
04901 
04902                 case jump_n:
04903                     EXTRACT_NUMBER (mcnt, p + 2);
04904                     DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
04905 
04906                     /* Originally, this is how many times we CAN jump.  */
04907                     if (mcnt)
04908                         {
04909                             mcnt--;
04910                             STORE_NUMBER (p + 2, mcnt);
04911 #ifdef _LIBC
04912                             DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
04913 #else
04914                             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
04915 #endif
04916                             goto unconditional_jump;
04917                         }
04918                     /* If don't have to jump any more, skip over the rest of command.  */
04919                     else
04920                         p += 4;
04921                     break;
04922 
04923                 case set_number_at:
04924                     {
04925                         DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
04926 
04927                         EXTRACT_NUMBER_AND_INCR (mcnt, p);
04928                         p1 = p + mcnt;
04929                         EXTRACT_NUMBER_AND_INCR (mcnt, p);
04930 #ifdef _LIBC
04931                         DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
04932 #else
04933                         DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
04934 #endif
04935                         STORE_NUMBER (p1, mcnt);
04936                         break;
04937                     }
04938 
04939 #if 0
04940                     /* The DEC Alpha C compiler 3.x generates incorrect code for the
04941                        test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
04942                        AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
04943                        macro and introducing temporary variables works around the bug.  */
04944 
04945                 case wordbound:
04946                     DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04947                     if (AT_WORD_BOUNDARY (d))
04948                         break;
04949                     goto fail;
04950 
04951                 case notwordbound:
04952                     DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04953                     if (AT_WORD_BOUNDARY (d))
04954                         goto fail;
04955                     break;
04956 #else
04957                 case wordbound:
04958                     {
04959                         boolean prevchar, thischar;
04960 
04961                         DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04962                         if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
04963                             break;
04964 
04965                         prevchar = WORDCHAR_P (d - 1);
04966                         thischar = WORDCHAR_P (d);
04967                         if (prevchar != thischar)
04968                             break;
04969                         goto fail;
04970                     }
04971 
04972                 case notwordbound:
04973                     {
04974                         boolean prevchar, thischar;
04975 
04976                         DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04977                         if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
04978                             goto fail;
04979 
04980                         prevchar = WORDCHAR_P (d - 1);
04981                         thischar = WORDCHAR_P (d);
04982                         if (prevchar != thischar)
04983                             goto fail;
04984                         break;
04985                     }
04986 #endif
04987 
04988                 case wordbeg:
04989                     DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
04990                     if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
04991                         break;
04992                     goto fail;
04993 
04994                 case wordend:
04995                     DEBUG_PRINT1 ("EXECUTING wordend.\n");
04996                     if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
04997                         && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
04998                         break;
04999                     goto fail;
05000 
05001 #ifdef emacs
05002                 case before_dot:
05003                     DEBUG_PRINT1 ("EXECUTING before_dot.\n");
05004                     if (PTR_CHAR_POS ((unsigned char *) d) >= point)
05005                         goto fail;
05006                     break;
05007 
05008                 case at_dot:
05009                     DEBUG_PRINT1 ("EXECUTING at_dot.\n");
05010                     if (PTR_CHAR_POS ((unsigned char *) d) != point)
05011                         goto fail;
05012                     break;
05013 
05014                 case after_dot:
05015                     DEBUG_PRINT1 ("EXECUTING after_dot.\n");
05016                     if (PTR_CHAR_POS ((unsigned char *) d) <= point)
05017                         goto fail;
05018                     break;
05019 
05020                 case syntaxspec:
05021                     DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
05022                     mcnt = *p++;
05023                     goto matchsyntax;
05024 
05025                 case wordchar:
05026                     DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
05027                     mcnt = (int) Sword;
05028                 matchsyntax:
05029                     PREFETCH ();
05030                     /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
05031                     d++;
05032                     if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
05033                         goto fail;
05034                     SET_REGS_MATCHED ();
05035                     break;
05036 
05037                 case notsyntaxspec:
05038                     DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
05039                     mcnt = *p++;
05040                     goto matchnotsyntax;
05041 
05042                 case notwordchar:
05043                     DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
05044                     mcnt = (int) Sword;
05045                 matchnotsyntax:
05046                     PREFETCH ();
05047                     /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
05048                     d++;
05049                     if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
05050                         goto fail;
05051                     SET_REGS_MATCHED ();
05052                     break;
05053 
05054 #else /* not emacs */
05055                 case wordchar:
05056                     DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
05057                     PREFETCH ();
05058                     if (!WORDCHAR_P (d))
05059                         goto fail;
05060                     SET_REGS_MATCHED ();
05061                     d++;
05062                     break;
05063 
05064                 case notwordchar:
05065                     DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
05066                     PREFETCH ();
05067                     if (WORDCHAR_P (d))
05068                         goto fail;
05069                     SET_REGS_MATCHED ();
05070                     d++;
05071                     break;
05072 #endif /* not emacs */
05073 
05074                 default:
05075                     abort ();
05076                 }
05077             continue;  /* Successfully executed one pattern command; keep going.  */
05078 
05079 
05080             /* We goto here if a matching operation fails. */
05081         fail:
05082             if (!FAIL_STACK_EMPTY ())
05083                 { /* A restart point is known.  Restore to that state.  */
05084                     DEBUG_PRINT1 ("\nFAIL:\n");
05085                     POP_FAILURE_POINT (d, p,
05086                                        lowest_active_reg, highest_active_reg,
05087                                        regstart, regend, reg_info);
05088 
05089                     /* If this failure point is a dummy, try the next one.  */
05090                     if (!p)
05091                         goto fail;
05092 
05093                     /* If we failed to the end of the pattern, don't examine *p.  */
05094                     assert (p <= pend);
05095                     if (p < pend)
05096                         {
05097                             boolean is_a_jump_n = false;
05098 
05099                             /* If failed to a backwards jump that's part of a repetition
05100                                loop, need to pop this failure point and use the next one.  */
05101                             switch ((re_opcode_t) *p)
05102                                 {
05103                                 case jump_n:
05104                                     is_a_jump_n = true;
05105                                 case maybe_pop_jump:
05106                                 case pop_failure_jump:
05107                                 case jump:
05108                                     p1 = p + 1;
05109                                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05110                                     p1 += mcnt;
05111 
05112                                     if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
05113                                         || (!is_a_jump_n
05114                                             && (re_opcode_t) *p1 == on_failure_jump))
05115                                         goto fail;
05116                                     break;
05117                                 default:
05118                                     /* do nothing */ ;
05119                                 }
05120                         }
05121 
05122                     if (d >= string1 && d <= end1)
05123                         dend = end_match_1;
05124                 }
05125             else
05126                 break;   /* Matching at this starting point really fails.  */
05127         } /* for (;;) */
05128 
05129     if (best_regs_set)
05130         goto restore_best_regs;
05131 
05132     FREE_VARIABLES ();
05133 
05134     return -1;                          /* Failure to match.  */
05135 } /* re_match_2 */
05136 
05137 /* Subroutine definitions for re_match_2.  */
05138 
05139 
05140 /* We are passed P pointing to a register number after a start_memory.
05141 
05142    Return true if the pattern up to the corresponding stop_memory can
05143    match the empty string, and false otherwise.
05144 
05145    If we find the matching stop_memory, sets P to point to one past its number.
05146    Otherwise, sets P to an undefined byte less than or equal to END.
05147 
05148    We don't handle duplicates properly (yet).  */
05149 
05150 static boolean
05151 group_match_null_string_p(unsigned char **p,
05152                           unsigned char *end,
05153                           register_info_type *reg_info)
05154 {
05155     int mcnt;
05156     /* Point to after the args to the start_memory.  */
05157     unsigned char *p1 = *p + 2;
05158 
05159     while (p1 < end)
05160         {
05161             /* Skip over opcodes that can match nothing, and return true or
05162                false, as appropriate, when we get to one that can't, or to the
05163                matching stop_memory.  */
05164 
05165             switch ((re_opcode_t) *p1)
05166                 {
05167                     /* Could be either a loop or a series of alternatives.  */
05168                 case on_failure_jump:
05169                     p1++;
05170                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05171 
05172                     /* If the next operation is not a jump backwards in the
05173                        pattern.  */
05174 
05175                     if (mcnt >= 0)
05176                         {
05177                             /* Go through the on_failure_jumps of the alternatives,
05178                                seeing if any of the alternatives cannot match nothing.
05179                                The last alternative starts with only a jump,
05180                                whereas the rest start with on_failure_jump and end
05181                                with a jump, e.g., here is the pattern for `a|b|c':
05182 
05183                                /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
05184                                /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
05185                                /exactn/1/c
05186 
05187                                So, we have to first go through the first (n-1)
05188                                alternatives and then deal with the last one separately.  */
05189 
05190 
05191                             /* Deal with the first (n-1) alternatives, which start
05192                                with an on_failure_jump (see above) that jumps to right
05193                                past a jump_past_alt.  */
05194 
05195                             while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
05196                                 {
05197                                     /* `mcnt' holds how many bytes long the alternative
05198                                        is, including the ending `jump_past_alt' and
05199                                        its number.  */
05200 
05201                                     if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
05202                                                                   reg_info))
05203                                         return false;
05204 
05205                                     /* Move to right after this alternative, including the
05206                                        jump_past_alt.  */
05207                                     p1 += mcnt;
05208 
05209                                     /* Break if it's the beginning of an n-th alternative
05210                                        that doesn't begin with an on_failure_jump.  */
05211                                     if ((re_opcode_t) *p1 != on_failure_jump)
05212                                         break;
05213 
05214                                     /* Still have to check that it's not an n-th
05215                                        alternative that starts with an on_failure_jump.  */
05216                                     p1++;
05217                                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05218                                     if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
05219                                         {
05220                                             /* Get to the beginning of the n-th alternative.  */
05221                                             p1 -= 3;
05222                                             break;
05223                                         }
05224                                 }
05225 
05226                             /* Deal with the last alternative: go back and get number
05227                                of the `jump_past_alt' just before it.  `mcnt' contains
05228                                the length of the alternative.  */
05229                             EXTRACT_NUMBER (mcnt, p1 - 2);
05230 
05231                             if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
05232                                 return false;
05233 
05234                             p1 += mcnt; /* Get past the n-th alternative.  */
05235                         } /* if mcnt > 0 */
05236                     break;
05237 
05238 
05239                 case stop_memory:
05240                     assert (p1[1] == **p);
05241                     *p = p1 + 2;
05242                     return true;
05243 
05244 
05245                 default:
05246                     if (!common_op_match_null_string_p (&p1, end, reg_info))
05247                         return false;
05248                 }
05249         } /* while p1 < end */
05250 
05251     return false;
05252 } /* group_match_null_string_p */
05253 
05254 
05255 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
05256    It expects P to be the first byte of a single alternative and END one
05257    byte past the last. The alternative can contain groups.  */
05258 
05259 static boolean
05260 alt_match_null_string_p(unsigned char *p,
05261                         unsigned char *end,
05262                         register_info_type *reg_info)
05263 {
05264     int mcnt;
05265     unsigned char *p1 = p;
05266 
05267     while (p1 < end)
05268         {
05269             /* Skip over opcodes that can match nothing, and break when we get
05270                to one that can't.  */
05271 
05272             switch ((re_opcode_t) *p1)
05273                 {
05274                     /* It's a loop.  */
05275                 case on_failure_jump:
05276                     p1++;
05277                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05278                     p1 += mcnt;
05279                     break;
05280 
05281                 default:
05282                     if (!common_op_match_null_string_p (&p1, end, reg_info))
05283                         return false;
05284                 }
05285         }  /* while p1 < end */
05286 
05287     return true;
05288 } /* alt_match_null_string_p */
05289 
05290 
05291 /* Deals with the ops common to group_match_null_string_p and
05292    alt_match_null_string_p.
05293 
05294    Sets P to one after the op and its arguments, if any.  */
05295 
05296 static boolean
05297 common_op_match_null_string_p(unsigned char **p,
05298                               unsigned char *end,
05299                               register_info_type *reg_info)
05300 {
05301     int mcnt;
05302     boolean ret;
05303     int reg_no;
05304     unsigned char *p1 = *p;
05305 
05306     switch ((re_opcode_t) *p1++)
05307         {
05308         case no_op:
05309         case begline:
05310         case endline:
05311         case begbuf:
05312         case endbuf:
05313         case wordbeg:
05314         case wordend:
05315         case wordbound:
05316         case notwordbound:
05317 #ifdef emacs
05318         case before_dot:
05319         case at_dot:
05320         case after_dot:
05321 #endif
05322             break;
05323 
05324         case start_memory:
05325             reg_no = *p1;
05326             assert (reg_no > 0 && reg_no <= MAX_REGNUM);
05327             ret = group_match_null_string_p (&p1, end, reg_info);
05328 
05329             /* Have to set this here in case we're checking a group which
05330                contains a group and a back reference to it.  */
05331 
05332             if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
05333                 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
05334 
05335             if (!ret)
05336                 return false;
05337             break;
05338 
05339             /* If this is an optimized succeed_n for zero times, make the jump.  */
05340         case jump:
05341             EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05342             if (mcnt >= 0)
05343                 p1 += mcnt;
05344             else
05345                 return false;
05346             break;
05347 
05348         case succeed_n:
05349             /* Get to the number of times to succeed.  */
05350             p1 += 2;
05351             EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05352 
05353             if (mcnt == 0)
05354                 {
05355                     p1 -= 4;
05356                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05357                     p1 += mcnt;
05358                 }
05359             else
05360                 return false;
05361             break;
05362 
05363         case duplicate:
05364             if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
05365                 return false;
05366             break;
05367 
05368         case set_number_at:
05369             p1 += 4;
05370 
05371         default:
05372             /* All other opcodes mean we cannot match the empty string.  */
05373             return false;
05374         }
05375 
05376     *p = p1;
05377     return true;
05378 } /* common_op_match_null_string_p */
05379 
05380 
05381 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
05382    bytes; nonzero otherwise.  */
05383 
05384 static int
05385 bcmp_translate(const char *s1,
05386                const char *s2,
05387                register int len,
05388                RE_TRANSLATE_TYPE translate)
05389 {
05390     register const unsigned char *p1 = (const unsigned char *) s1;
05391     register const unsigned char *p2 = (const unsigned char *) s2;
05392     while (len)
05393         {
05394             if (translate[*p1++] != translate[*p2++]) return 1;
05395             len--;
05396         }
05397     return 0;
05398 }
05399 
05400 /* Entry points for GNU code.  */
05401 
05402 /* re_compile_pattern is the GNU regular expression compiler: it
05403    compiles PATTERN (of length SIZE) and puts the result in BUFP.
05404    Returns 0 if the pattern was valid, otherwise an error string.
05405 
05406    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
05407    are set in BUFP on entry.
05408 
05409    We call regex_compile to do the actual compilation.  */
05410 
05411 const char *
05412 re_compile_pattern(const char *pattern,
05413                    size_t length,
05414                    struct re_pattern_buffer *bufp)
05415 {
05416     reg_errcode_t ret;
05417 
05418     /* GNU code is written to assume at least RE_NREGS registers will be set
05419        (and at least one extra will be -1).  */
05420     bufp->regs_allocated = REGS_UNALLOCATED;
05421 
05422     /* And GNU code determines whether or not to get register information
05423        by passing null for the REGS argument to re_match, etc., not by
05424        setting no_sub.  */
05425     bufp->no_sub = 0;
05426 
05427     /* Match anchors at newline.  */
05428     bufp->newline_anchor = 1;
05429 
05430     ret = regex_compile (pattern, length, re_syntax_options, bufp);
05431 
05432     if (!ret)
05433         return NULL;
05434     return gettext (re_error_msgid[(int) ret]);
05435 }
05436 
05437 /* Entry points compatible with 4.2 BSD regex library.  We don't define
05438    them unless specifically requested.  */
05439 
05440 #if defined (_REGEX_RE_COMP) || defined (_LIBC)
05441 
05442 /* BSD has one and only one pattern buffer.  */
05443 static struct re_pattern_buffer re_comp_buf;
05444 
05445 char *
05446 #ifdef _LIBC
05447 /* Make these definitions weak in libc, so POSIX programs can redefine
05448    these names if they don't use our functions, and still use
05449    regcomp/regexec below without link errors.  */
05450 weak_function
05451 #endif
05452 re_comp (s)
05453      const char *s;
05454 {
05455     reg_errcode_t ret;
05456 
05457     if (!s)
05458         {
05459             if (!re_comp_buf.buffer)
05460                 return gettext ("No previous regular expression");
05461             return 0;
05462         }
05463 
05464     if (!re_comp_buf.buffer)
05465         {
05466             re_comp_buf.buffer = (unsigned char *) malloc (200);
05467             if (re_comp_buf.buffer == NULL)
05468                 return gettext (re_error_msgid[(int) REG_ESPACE]);
05469             re_comp_buf.allocated = 200;
05470 
05471             re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
05472             if (re_comp_buf.fastmap == NULL)
05473                 return gettext (re_error_msgid[(int) REG_ESPACE]);
05474         }
05475 
05476     /* Since `re_exec' always passes NULL for the `regs' argument, we
05477        don't need to initialize the pattern buffer fields which affect it.  */
05478 
05479     /* Match anchors at newlines.  */
05480     re_comp_buf.newline_anchor = 1;
05481 
05482     ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
05483 
05484     if (!ret)
05485         return NULL;
05486 
05487     /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
05488     return (char *) gettext (re_error_msgid[(int) ret]);
05489 }
05490 
05491 
05492 int
05493 #ifdef _LIBC
05494 weak_function
05495 #endif
05496 re_exec (s)
05497      const char *s;
05498 {
05499     const int len = strlen (s);
05500     return
05501         0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
05502 }
05503 
05504 #endif /* _REGEX_RE_COMP */
05505 
05506 /* POSIX.2 functions.  Don't define these for Emacs.  */
05507 
05508 #ifndef emacs
05509 
05510 /* regcomp takes a regular expression as a string and compiles it.
05511 
05512    PREG is a regex_t *.  We do not expect any fields to be initialized,
05513    since POSIX says we shouldn't.  Thus, we set
05514 
05515      `buffer' to the compiled pattern;
05516      `used' to the length of the compiled pattern;
05517      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
05518        REG_EXTENDED bit in CFLAGS is set; otherwise, to
05519        RE_SYNTAX_POSIX_BASIC;
05520      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
05521      `fastmap' and `fastmap_accurate' to zero;
05522      `re_nsub' to the number of subexpressions in PATTERN.
05523 
05524    PATTERN is the address of the pattern string.
05525 
05526    CFLAGS is a series of bits which affect compilation.
05527 
05528      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
05529      use POSIX basic syntax.
05530 
05531      If REG_NEWLINE is set, then . and [^...] don't match newline.
05532      Also, regexec will try a match beginning after every newline.
05533 
05534      If REG_ICASE is set, then we considers upper- and lowercase
05535      versions of letters to be equivalent when matching.
05536 
05537      If REG_NOSUB is set, then when PREG is passed to regexec, that
05538      routine will report only success or failure, and nothing about the
05539      registers.
05540 
05541    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
05542    the return codes and their meanings.)  */
05543 
05544 int
05545 regcomp(regex_t *preg,
05546         const char *pattern,
05547         int cflags)
05548 {
05549     reg_errcode_t ret;
05550     reg_syntax_t syntax
05551         = (cflags & REG_EXTENDED) ?
05552         RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
05553 
05554 #ifdef DEBUG
05555         debug=0;
05556         DEBUG_PRINT1("EXECUTING regcomp");
05557      debug=0;   
05558 #endif
05559     /* regex_compile will allocate the space for the compiled pattern.  */
05560     preg->buffer = 0;
05561     preg->allocated = 0;
05562     preg->used = 0;
05563 
05564     /* Don't bother to use a fastmap when searching.  This simplifies the
05565        REG_NEWLINE case: if we used a fastmap, we'd have to put all the
05566        characters after newlines into the fastmap.  This way, we just try
05567        every character.  */
05568     preg->fastmap = 0;
05569 
05570     if (cflags & REG_ICASE)
05571         {
05572             unsigned i;
05573 
05574             preg->translate
05575                 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
05576                                               * sizeof (*(RE_TRANSLATE_TYPE)0));
05577             if (preg->translate == NULL)
05578                 return (int) REG_ESPACE;
05579 
05580             /* Map uppercase characters to corresponding lowercase ones.  */
05581             for (i = 0; i < CHAR_SET_SIZE; i++)
05582                 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
05583         }
05584     else
05585         preg->translate = NULL;
05586 
05587     /* If REG_NEWLINE is set, newlines are treated differently.  */
05588     if (cflags & REG_NEWLINE)
05589         { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
05590             syntax &= ~RE_DOT_NEWLINE;
05591             syntax |= RE_HAT_LISTS_NOT_NEWLINE;
05592             /* It also changes the matching behavior.  */
05593             preg->newline_anchor = 1;
05594         }
05595     else
05596         preg->newline_anchor = 0;
05597 
05598     preg->no_sub = !!(cflags & REG_NOSUB);
05599 
05600     /* POSIX says a null character in the pattern terminates it, so we
05601        can use strlen here in compiling the pattern.  */
05602     ret = regex_compile (pattern, strlen (pattern), syntax, preg);
05603 
05604     /* POSIX doesn't distinguish between an unmatched open-group and an
05605        unmatched close-group: both are REG_EPAREN.  */
05606     if (ret == REG_ERPAREN) ret = REG_EPAREN;
05607 
05608 //      printf("done with regcomp\n");
05609 
05610     return (int) ret;
05611 }
05612 
05613 
05614 /* regexec searches for a given pattern, specified by PREG, in the
05615    string STRING.
05616 
05617    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
05618    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
05619    least NMATCH elements, and we set them to the offsets of the
05620    corresponding matched substrings.
05621 
05622    EFLAGS specifies `execution flags' which affect matching: if
05623    REG_NOTBOL is set, then ^ does not match at the beginning of the
05624    string; if REG_NOTEOL is set, then $ does not match at the end.
05625 
05626    We return 0 if we find a match and REG_NOMATCH if not.  */
05627 
05628 int
05629 regexec(const regex_t *preg,
05630         const char *string,
05631         size_t nmatch,
05632         regmatch_t pmatch[],
05633         int eflags)
05634 {
05635     int ret;
05636     struct re_registers regs;
05637     regex_t private_preg;
05638     int len = strlen (string);
05639     boolean want_reg_info = !preg->no_sub && nmatch > 0;
05640 
05641     private_preg = *preg;
05642 
05643     private_preg.not_bol = !!(eflags & REG_NOTBOL);
05644     private_preg.not_eol = !!(eflags & REG_NOTEOL);
05645 
05646     /* The user has told us exactly how many registers to return
05647        information about, via `nmatch'.  We have to pass that on to the
05648        matching routines.  */
05649     private_preg.regs_allocated = REGS_FIXED;
05650 
05651     if (want_reg_info)
05652         {
05653             regs.num_regs = nmatch;
05654             regs.start = TALLOC (nmatch, regoff_t);
05655             regs.end = TALLOC (nmatch, regoff_t);
05656             if (regs.start == NULL || regs.end == NULL)
05657                 return (int) REG_NOMATCH;
05658         }
05659 
05660     /* Perform the searching operation.  */
05661     ret = re_search (&private_preg, string, len,
05662                      /* start: */ 0, /* range: */ len,
05663                      want_reg_info ? &regs : (struct re_registers *) 0);
05664 
05665     /* Copy the register information to the POSIX structure.  */
05666     if (want_reg_info)
05667         {
05668             if (ret >= 0)
05669                 {
05670                     unsigned r;
05671 
05672                     for (r = 0; r < nmatch; r++)
05673                         {
05674                             pmatch[r].rm_so = regs.start[r];
05675                             pmatch[r].rm_eo = regs.end[r];
05676                         }
05677                 }
05678 
05679             /* If we needed the temporary register info, free the space now.  */
05680             free (regs.start);
05681             free (regs.end);
05682         }
05683 
05684     /* We want zero return to mean success, unlike `re_search'.  */
05685     return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
05686 }
05687 
05688 
05689 /* Returns a message corresponding to an error code, ERRCODE, returned
05690    from either regcomp or regexec.   We don't use PREG here.  */
05691 
05692 size_t
05693 regerror(int errcode,
05694          const regex_t *preg,
05695          char *errbuf,
05696          size_t errbuf_size)
05697 {
05698     const char *msg;
05699     size_t msg_size;
05700 
05701     if (errcode < 0
05702         || errcode >= (int) (sizeof (re_error_msgid)
05703                              / sizeof (re_error_msgid[0])))
05704         /* Only error codes returned by the rest of the code should be passed
05705            to this routine.  If we are given anything else, or if other regex
05706            code generates an invalid error code, then the program has a bug.
05707            Dump core so we can fix it.  */
05708         abort ();
05709 
05710     msg = gettext (re_error_msgid[errcode]);
05711 
05712     msg_size = strlen (msg) + 1; /* Includes the null.  */
05713 
05714     if (errbuf_size != 0)
05715         {
05716             if (msg_size > errbuf_size)
05717                 {
05718                     strncpy (errbuf, msg, errbuf_size - 1);
05719                     errbuf[errbuf_size - 1] = 0;
05720                 }
05721             else
05722                 strcpy (errbuf, msg);
05723         }
05724 
05725     return msg_size;
05726 }
05727 
05728 
05729 /* Free dynamically allocated space used by PREG.  */
05730 
05731 void
05732 regfree(regex_t *preg)
05733 {
05734     if (preg->buffer != NULL)
05735         free (preg->buffer);
05736     preg->buffer = NULL;
05737 
05738     preg->allocated = 0;
05739     preg->used = 0;
05740 
05741     if (preg->fastmap != NULL)
05742         free (preg->fastmap);
05743     preg->fastmap = NULL;
05744     preg->fastmap_accurate = 0;
05745 
05746     if (preg->translate != NULL)
05747         free (preg->translate);
05748     preg->translate = NULL;
05749 }
05750 
05751 #endif /* not emacs  */

Generated on Mon Mar 31 19:03:27 2008 for Graphviz by  doxygen 1.5.1