/misc/src/release/graphviz-2.18-1/src/graphviz-2.18/lib/graph/lexer.c

Go to the documentation of this file.
00001 /* $Id: lexer.c,v 1.12 2008/02/19 06:57:39 glenlow Exp $ $Revision: 1.12 $ */
00002 /* vim:set shiftwidth=4 ts=8: */
00003 
00004 /**********************************************************
00005 *      This software is part of the graphviz package      *
00006 *                http://www.graphviz.org/                 *
00007 *                                                         *
00008 *            Copyright (c) 1994-2004 AT&T Corp.           *
00009 *                and is licensed under the                *
00010 *            Common Public License, Version 1.0           *
00011 *                      by AT&T Corp.                      *
00012 *                                                         *
00013 *        Information and Software Systems Research        *
00014 *              AT&T Research, Florham Park NJ             *
00015 **********************************************************/
00016 
00017 
00018 #include <stdarg.h>
00019 #include "libgraph.h"
00020 #include "parser.h"
00021 #include "triefa.cP"
00022 #include "agxbuf.h"
00023 
00024 #ifdef DMALLOC
00025 #include "dmalloc.h"
00026 #endif
00027 
00028 #define InfileName (InputFile?InputFile:"<unknown>")
00029 
00030 static FILE *Lexer_fp;
00031 static char *LexPtr, *TokenBuf;
00032 static int LineBufSize;
00033 static unsigned char In_comment;
00034 static unsigned char Comment_start;
00035 static unsigned char Start_html_string;
00036 int Line_number;
00037 static char *InputFile;
00038 static gets_f Lexer_gets;
00039 
00040 static void
00041 storeFileName (char* fname, int len)
00042 {
00043     static int cnt;
00044     static char* buf;
00045 
00046     if (len > cnt) {
00047         if (cnt) buf = (char*)realloc (buf, len+1);
00048         else buf = (char*)malloc (len+1);
00049         cnt = len;
00050     }
00051     strcpy (buf, fname);
00052     InputFile = buf;
00053 }
00054 
00055   /* Reset line number.
00056    * Argument n is indexed from 1, so we decrement it.
00057    */
00058 void agreadline(int n)
00059 {
00060     Line_number = n - 1;
00061 }
00062 
00063 int aglinenumber ()
00064 {
00065     return Line_number;
00066 }
00067 
00068   /* (Re)set file:
00069    */
00070 void agsetfile(char *f)
00071 {
00072     InputFile = f;
00073     Line_number = 0;
00074 }
00075 
00076 void aglexinit(FILE * fp, gets_f mygets)
00077 {
00078     Lexer_fp = fp;
00079     Lexer_gets = mygets;
00080     LexPtr = NULL;
00081     if (AG.linebuf == NULL) {
00082         LineBufSize = BUFSIZ;
00083         AG.linebuf = N_NEW(LineBufSize, char);
00084         TokenBuf = N_NEW(LineBufSize, char);
00085     }
00086     (Lexer_gets) (AG.linebuf, 0, fp);   /* reset mygets */
00087 }
00088 
00089 #define ISSPACE(c) ((c != 0) && ((isspace(c) || iscntrl(c))))
00090 
00091 /* skip leading white space and comments in a string p
00092  * whitespace includes control characters
00093  */
00094 static char *skip_wscomments(char *pp)
00095 {
00096     unsigned char *p = (unsigned char *) pp;
00097     do {
00098         while (ISSPACE(*p))
00099             p++;
00100         while (In_comment && p[0]) {
00101             while (p[0] && (p[0] != '*'))
00102                 p++;
00103             if (p[0]) {
00104                 if (p[1] == '/') {
00105                     In_comment = FALSE;
00106                     p += 2;
00107                     break;
00108                 } else
00109                     p++;
00110             }
00111         }
00112         if (p[0] == '/') {
00113             if (p[1] == '/')
00114                 while (*p)
00115                     p++;        /* skip to end of line */
00116             else {
00117                 if (p[1] == '*') {
00118                     In_comment = TRUE;
00119                     Comment_start = Line_number;
00120                     p += 2;
00121                     continue;
00122                 } else
00123                     break;      /* return a slash */
00124             }
00125         } else {
00126             if (!ISSPACE(*p))
00127                 break;
00128         }
00129     } while (p[0]);
00130     return (char *) p;
00131 }
00132 
00133 /* scan an unquoted token and return the position after its terminator */
00134 static char *scan_token(char *p, char *token)
00135 {
00136     char *q;
00137 
00138     q = token;
00139     if (p == '\0')
00140         return NULL;
00141     while (ISALNUM(*p)) {
00142         *q++ = *p++;
00143     }
00144     *q = '\0';
00145     return p;
00146 }
00147 
00148 static char *scan_num(char *p, char *token)
00149 {
00150     unsigned char *q, *z;
00151     int saw_rp = FALSE;
00152     int saw_digit = FALSE;
00153 
00154     z = (unsigned char *) p;
00155     q = (unsigned char *) token;
00156     if (*z == '-')
00157         *q++ = *z++;
00158     if (*z == '.') {
00159         saw_rp = TRUE;
00160         *q++ = *z++;
00161     }
00162     while (isdigit(*z)) {
00163         saw_digit = TRUE;
00164         *q++ = *z++;
00165     }
00166     if ((*z == '.') && (saw_rp == FALSE)) {
00167         saw_rp = TRUE;
00168         *q++ = *z++;
00169         while (isdigit(*z)) {
00170             saw_digit = TRUE;
00171             *q++ = *z++;
00172         }
00173     }
00174     *q = '\0';
00175     if (saw_digit && *z && ((isalpha(*z)) || (*z == '_'))) {
00176         unsigned char *endp = z + 1;
00177         unsigned char c;
00178         while ((c = *endp) && ((isalpha(c)) || (c == '_')))
00179             endp++;
00180         *endp = '\0';
00181         agerr(AGWARN,
00182               "%s:%d: ambiguous \"%s\" splits into two names: \"%s\" and \"%s\"\n",
00183               InfileName, Line_number, p, token, z);
00184         *endp = c;
00185     }
00186 
00187     if (saw_digit == FALSE)
00188         z = NULL;
00189     return (char *) z;
00190 }
00191 
00192 /* scan a quoted string and return the position after its terminator */
00193 static char *quoted_string(char *p, char *token)
00194 {
00195     char quote, *q;
00196 
00197     quote = *p++;
00198     q = token;
00199     while ((*p) && (*p != quote)) {
00200         if (*p == '\\') {
00201             if (*(p + 1) == quote)
00202                 p++;
00203             else {
00204                 if (*(p + 1) == '\\')
00205                     *q++ = *p++;
00206             }
00207         }
00208         *q++ = *p++;
00209     }
00210     if (*p == '\0')
00211         agerr(AGWARN, "%s:%d: string ran past end of line\n",
00212               InfileName, Line_number);
00213     else
00214         p++;
00215     *q = 0;
00216     return p;
00217 }
00218 
00219 int myaglex(void)
00220 {                               /* for debugging */
00221     int rv = aglex();
00222     fprintf(stderr, "returning %d\n", rv);
00223     if (rv == T_symbol)
00224         fprintf(stderr, "string val is %s\n", aglval.str);
00225     return rv;
00226 }
00227 
00228 /*
00229  * Return a logical line in AG.linebuf.
00230  * In particular, the buffer will contain a '\n' as the last non-null char.
00231  * Ignore lines beginning with '#'; update cpp line number if applicable.
00232  * Fold long lines, i.e., ignore escaped newlines.
00233  * Assume the Lexer_gets function reads upto newline or buffer length
00234  * like fgets.
00235  * Need to be careful that Lexer_gets might not return full physical line
00236  * because buffer is too small to hold it.
00237  */
00238 static char *lex_gets(void)
00239 {
00240     char *clp;
00241     int len, curlen;
00242 
00243     len = curlen = 0;
00244 
00245     do {
00246         /* make sure there is room for at least another SMALLBUF worth */
00247         if (curlen + SMALLBUF >= LineBufSize) {
00248             LineBufSize += BUFSIZ;
00249             AG.linebuf = realloc(AG.linebuf, LineBufSize);
00250             TokenBuf = realloc(TokenBuf, LineBufSize);
00251         }
00252 
00253         /* off by one so we can back up in LineBuf */
00254         clp =
00255             (Lexer_gets) (AG.linebuf + curlen + 1,
00256                           LineBufSize - curlen - 1, Lexer_fp);
00257         if (clp == NULL)
00258             break;
00259 
00260 
00261         len = strlen(clp);      /* since clp != NULL, len > 0 */
00262         if (clp[len - 1] == '\n') {     /* have physical line */
00263             if ((clp[0] == '#') && (curlen == 0)) {
00264                 /* comment line or cpp line sync */
00265                 int r, cnt;
00266                 char buf[2];
00267                 char* s = clp + 1;
00268 
00269                 if (strncmp(s, "line", 4) == 0) s += 4;
00270                 r = sscanf(s, "%d %1[\"]%n", &Line_number, buf, &cnt);
00271                 if (r <= 0) Line_number++;
00272                 else { /* got line number */ 
00273                     Line_number--;
00274                     if (r > 1) { /* saw quote */
00275                         char* p = s + cnt;
00276                         char* e = p;
00277                         while (*e && (*e != '"')) e++; 
00278                         if (e != p) {
00279                             *e = '\0';
00280                             storeFileName (p, e-p);
00281                         }
00282                     }
00283                 }
00284                 clp[0] = 0;
00285                 len = 1;    /* this will make the while test below succeed */
00286                 continue;
00287             }
00288             Line_number++;
00289                 /* Note it is possible len == 1 and last character in
00290                  * previous read was '\\'
00291                  * It is also possible to have curlen=0, and read in
00292                  * "\\\n". 
00293                  */
00294             if (clp[len - 2] == '\\') { /* escaped newline */
00295                 len = len - 2;
00296                 clp[len] = '\0';
00297             }
00298         }
00299         curlen += len;
00300         /* the following test relies on having AG.linebuf[0] == '\0' */
00301     } while (clp[len - 1] != '\n');
00302 
00303     if (curlen > 0)
00304         return AG.linebuf + 1;
00305     else
00306         return NULL;
00307 }
00308 
00309 /* html_pair:
00310  * Iteratively scan nested "<...>"
00311  * p points to first character after initial '<'
00312  * Store characters up to but not including matching '>'
00313  * Return pointer to matching '>'
00314  * We do not check for any escape sequences; pure HTML is
00315  * expected, so special characters need to be HTML escapes.
00316  * We read them in and allow the HTML parser to convert them.
00317  */
00318 static char *html_pair(char *p, agxbuf * tokp)
00319 {
00320     unsigned char c;
00321     int rc, depth = 1;
00322 
00323     while (1) {
00324         while ((c = *p)) {
00325             if (c == '>') {
00326                 depth--;
00327                 if (depth == 0)
00328                     return p;   /* p points to closing > */
00329             } else if (c == '<')
00330                 depth++;
00331             rc = agxbputc(tokp, c);
00332             p++;
00333         }
00334         if ((p = lex_gets()) == NULL) {
00335             agerr(AGWARN,
00336                   "non-terminated HTML string starting line %d, file %s\n",
00337                   Start_html_string, InfileName);
00338             return 0;
00339         }
00340     }
00341 }
00342 
00343 /* html_string:
00344  * scan an html string and return the position after its terminator 
00345  * The string is stored in token.
00346  * p points to the opening <.
00347  */
00348 
00349 static char *html_string(char *p, agxbuf * token)
00350 {
00351     Start_html_string = Line_number;
00352     p = html_pair(p + 1, token);
00353     if (p)
00354         p++;                    /* skip closing '>' */
00355     return p;
00356 }
00357 
00358 int agtoken(char *p)
00359 {
00360     char ch;
00361     TFA_Init();
00362     while ((ch = *p)) {
00363         /* any non-ascii characters converted to ascii DEL (127) */
00364         TFA_Advance(ch & ~127 ? 127 : ch);
00365         p++;
00366     }
00367     return TFA_Definition();
00368 }
00369 
00370 int aglex(void)
00371 {
00372     int token;
00373     char *tbuf, *p;
00374     static unsigned char BOM[] = { 0xEF, 0xBB, 0xBF };  /* UTF-8 byte order marker */
00375 
00376     /* if the parser has accepted a graph, reset and return EOF */
00377     if (AG.accepting_state) {
00378         AG.accepting_state = FALSE;
00379         return EOF;
00380     }
00381 
00382     /* get a nonempty lex buffer */
00383     do {
00384         if ((LexPtr == NULL) || (LexPtr[0] == '\0'))
00385             if ((LexPtr = lex_gets()) == NULL) {
00386                 if (In_comment)
00387                     agerr(AGWARN, "nonterminated comment in line %d\n",
00388                           Comment_start);
00389                 return EOF;
00390             }
00391         /* skip UTF-8 Byte Order Marker if at beginning of file */
00392         if ((Line_number == 1) && !strncmp(LexPtr, (char *) BOM, 3))
00393             LexPtr += 3;
00394         LexPtr = (char *) skip_wscomments(LexPtr);
00395     } while (LexPtr[0] == '\0');
00396 
00397     tbuf = TokenBuf;
00398 
00399     /* scan quoted strings */
00400     if (LexPtr[0] == '\"') {
00401         LexPtr = quoted_string(LexPtr, tbuf);
00402         aglval.str = agstrdup(tbuf);
00403         return T_qsymbol;
00404     }
00405 
00406     /* scan HTML strings */
00407     if (LexPtr[0] == '<') {
00408         agxbuf xb;
00409         unsigned char htmlbuf[BUFSIZ];
00410         agxbinit(&xb, BUFSIZ, htmlbuf);
00411         LexPtr = html_string(LexPtr, &xb);
00412         aglval.str = agstrdup_html(agxbuse(&xb));
00413         agxbfree(&xb);
00414         return T_symbol;
00415     }
00416 
00417     /* scan edge operator */
00418     if (AG.edge_op
00419         && (strncmp(LexPtr, AG.edge_op, strlen(AG.edge_op)) == 0)) {
00420         LexPtr += strlen(AG.edge_op);
00421         return T_edgeop;
00422     }
00423 
00424     /* scan numbers */
00425     if ((p = scan_num(LexPtr, tbuf))) {
00426         LexPtr = p;
00427         aglval.str = agstrdup(tbuf);
00428         return T_symbol;
00429     } else {
00430         unsigned char uc = *(unsigned char *) LexPtr;
00431         if (ispunct(uc) && (uc != '_'))
00432             return *LexPtr++;
00433         else
00434             LexPtr = scan_token(LexPtr, tbuf);
00435     }
00436 
00437     /* scan other tokens */
00438     token = agtoken(tbuf);
00439     if (token == -1) {
00440         aglval.str = agstrdup(tbuf);
00441         token = T_symbol;
00442     }
00443     return token;
00444 }
00445 
00446 static void error_context(void)
00447 {
00448     char *p;
00449     char c;
00450     char *buf = AG.linebuf + 1; /* characters are always put at AG.linebuf[1] */
00451     /* or later; AG.linebuf[0] = '\0' */
00452 
00453     if (LexPtr == NULL)
00454         return;
00455     agerr(AGPREV, "context: ");
00456     for (p = LexPtr - 1; (p > buf) && (!isspace(*(unsigned char *) p));
00457          p--);
00458     if (buf < p) {
00459         c = *p;
00460         *p = '\0';
00461         agerr(AGPREV, buf);
00462         *p = c;
00463     }
00464     agerr(AGPREV, " >>> ");
00465     c = *LexPtr;
00466     *LexPtr = '\0';
00467     agerr(AGPREV, p);
00468     *LexPtr = c;
00469     agerr(AGPREV, " <<< ");
00470     agerr(AGPREV, LexPtr);
00471 }
00472 
00473 void agerror(char *msg)
00474 {
00475     if (AG.syntax_errors++)
00476         return;
00477     agerr(AGERR, "%s:%d: %s near line %d\n",
00478           InfileName, Line_number, msg, Line_number);
00479     error_context();
00480 }
00481 
00482 agerrlevel_t agerrno;           /* Last error */
00483 static agerrlevel_t agerrlevel = AGWARN;        /* Report errors >= agerrlevel */
00484 static long aglast;             /* Last message */
00485 static FILE *agerrout;          /* Message file */
00486 
00487 void agseterr(agerrlevel_t lvl)
00488 {
00489     agerrlevel = lvl;
00490 }
00491 
00492 char *aglasterr()
00493 {
00494     long endpos;
00495     long len;
00496     char *buf;
00497 
00498     if (!agerrout)
00499         return 0;
00500     fflush(agerrout);
00501     endpos = ftell(agerrout);
00502     len = endpos - aglast;
00503     buf = malloc(len + 1);
00504     fseek(agerrout, aglast, SEEK_SET);
00505     fread(buf, sizeof(char), len, agerrout);
00506     buf[len] = '\0';
00507     fseek(agerrout, endpos, SEEK_SET);
00508 
00509     return buf;
00510 }
00511 
00512 static int agerr_va(agerrlevel_t level, char *fmt, va_list args)
00513 {
00514     agerrlevel_t lvl;
00515 
00516     lvl = (level == AGPREV ? agerrno : (level == AGMAX) ? AGERR : level);
00517 
00518     agerrno = lvl;
00519     if (lvl >= agerrlevel) {
00520         if (level != AGPREV)
00521             fprintf(stderr, "%s: ",
00522                     (level == AGERR) ? "Error" : "Warning");
00523         vfprintf(stderr, fmt, args);
00524         va_end(args);
00525         return 0;
00526     }
00527 
00528     if (!agerrout) {
00529         agerrout = tmpfile();
00530         if (!agerrout)
00531             return 1;
00532     }
00533 
00534     if (level != AGPREV)
00535         aglast = ftell(agerrout);
00536     vfprintf(agerrout, fmt, args);
00537     va_end(args);
00538     return 0;
00539 }
00540 
00541 int agerr(agerrlevel_t level, char *fmt, ...)
00542 {
00543     va_list args;
00544 
00545     va_start(args, fmt);
00546     return agerr_va(level, fmt, args);
00547 }
00548 
00549 void agerrorf(char *fmt, ...)
00550 {
00551     va_list args;
00552 
00553     va_start(args, fmt);
00554     agerr_va(AGERR, fmt, args);
00555 }
00556 
00557 void agwarningf(char *fmt, ...)
00558 {
00559     va_list args;
00560 
00561     va_start(args, fmt);
00562     agerr_va(AGWARN, fmt, args);
00563 }

Generated on Mon Mar 31 19:03:26 2008 for Graphviz by  doxygen 1.5.1