00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdarg.h>
00019 #include "libgraph.h"
00020 #include "parser.h"
00021 #include "triefa.cP"
00022 #include "agxbuf.h"
00023
00024 #ifdef DMALLOC
00025 #include "dmalloc.h"
00026 #endif
00027
00028 #define InfileName (InputFile?InputFile:"<unknown>")
00029
00030 static FILE *Lexer_fp;
00031 static char *LexPtr, *TokenBuf;
00032 static int LineBufSize;
00033 static unsigned char In_comment;
00034 static unsigned char Comment_start;
00035 static unsigned char Start_html_string;
00036 int Line_number;
00037 static char *InputFile;
00038 static gets_f Lexer_gets;
00039
00040 static void
00041 storeFileName (char* fname, int len)
00042 {
00043 static int cnt;
00044 static char* buf;
00045
00046 if (len > cnt) {
00047 if (cnt) buf = (char*)realloc (buf, len+1);
00048 else buf = (char*)malloc (len+1);
00049 cnt = len;
00050 }
00051 strcpy (buf, fname);
00052 InputFile = buf;
00053 }
00054
00055
00056
00057
00058 void agreadline(int n)
00059 {
00060 Line_number = n - 1;
00061 }
00062
00063 int aglinenumber ()
00064 {
00065 return Line_number;
00066 }
00067
00068
00069
00070 void agsetfile(char *f)
00071 {
00072 InputFile = f;
00073 Line_number = 0;
00074 }
00075
00076 void aglexinit(FILE * fp, gets_f mygets)
00077 {
00078 Lexer_fp = fp;
00079 Lexer_gets = mygets;
00080 LexPtr = NULL;
00081 if (AG.linebuf == NULL) {
00082 LineBufSize = BUFSIZ;
00083 AG.linebuf = N_NEW(LineBufSize, char);
00084 TokenBuf = N_NEW(LineBufSize, char);
00085 }
00086 (Lexer_gets) (AG.linebuf, 0, fp);
00087 }
00088
00089 #define ISSPACE(c) ((c != 0) && ((isspace(c) || iscntrl(c))))
00090
00091
00092
00093
00094 static char *skip_wscomments(char *pp)
00095 {
00096 unsigned char *p = (unsigned char *) pp;
00097 do {
00098 while (ISSPACE(*p))
00099 p++;
00100 while (In_comment && p[0]) {
00101 while (p[0] && (p[0] != '*'))
00102 p++;
00103 if (p[0]) {
00104 if (p[1] == '/') {
00105 In_comment = FALSE;
00106 p += 2;
00107 break;
00108 } else
00109 p++;
00110 }
00111 }
00112 if (p[0] == '/') {
00113 if (p[1] == '/')
00114 while (*p)
00115 p++;
00116 else {
00117 if (p[1] == '*') {
00118 In_comment = TRUE;
00119 Comment_start = Line_number;
00120 p += 2;
00121 continue;
00122 } else
00123 break;
00124 }
00125 } else {
00126 if (!ISSPACE(*p))
00127 break;
00128 }
00129 } while (p[0]);
00130 return (char *) p;
00131 }
00132
00133
00134 static char *scan_token(char *p, char *token)
00135 {
00136 char *q;
00137
00138 q = token;
00139 if (p == '\0')
00140 return NULL;
00141 while (ISALNUM(*p)) {
00142 *q++ = *p++;
00143 }
00144 *q = '\0';
00145 return p;
00146 }
00147
00148 static char *scan_num(char *p, char *token)
00149 {
00150 unsigned char *q, *z;
00151 int saw_rp = FALSE;
00152 int saw_digit = FALSE;
00153
00154 z = (unsigned char *) p;
00155 q = (unsigned char *) token;
00156 if (*z == '-')
00157 *q++ = *z++;
00158 if (*z == '.') {
00159 saw_rp = TRUE;
00160 *q++ = *z++;
00161 }
00162 while (isdigit(*z)) {
00163 saw_digit = TRUE;
00164 *q++ = *z++;
00165 }
00166 if ((*z == '.') && (saw_rp == FALSE)) {
00167 saw_rp = TRUE;
00168 *q++ = *z++;
00169 while (isdigit(*z)) {
00170 saw_digit = TRUE;
00171 *q++ = *z++;
00172 }
00173 }
00174 *q = '\0';
00175 if (saw_digit && *z && ((isalpha(*z)) || (*z == '_'))) {
00176 unsigned char *endp = z + 1;
00177 unsigned char c;
00178 while ((c = *endp) && ((isalpha(c)) || (c == '_')))
00179 endp++;
00180 *endp = '\0';
00181 agerr(AGWARN,
00182 "%s:%d: ambiguous \"%s\" splits into two names: \"%s\" and \"%s\"\n",
00183 InfileName, Line_number, p, token, z);
00184 *endp = c;
00185 }
00186
00187 if (saw_digit == FALSE)
00188 z = NULL;
00189 return (char *) z;
00190 }
00191
00192
00193 static char *quoted_string(char *p, char *token)
00194 {
00195 char quote, *q;
00196
00197 quote = *p++;
00198 q = token;
00199 while ((*p) && (*p != quote)) {
00200 if (*p == '\\') {
00201 if (*(p + 1) == quote)
00202 p++;
00203 else {
00204 if (*(p + 1) == '\\')
00205 *q++ = *p++;
00206 }
00207 }
00208 *q++ = *p++;
00209 }
00210 if (*p == '\0')
00211 agerr(AGWARN, "%s:%d: string ran past end of line\n",
00212 InfileName, Line_number);
00213 else
00214 p++;
00215 *q = 0;
00216 return p;
00217 }
00218
00219 int myaglex(void)
00220 {
00221 int rv = aglex();
00222 fprintf(stderr, "returning %d\n", rv);
00223 if (rv == T_symbol)
00224 fprintf(stderr, "string val is %s\n", aglval.str);
00225 return rv;
00226 }
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238 static char *lex_gets(void)
00239 {
00240 char *clp;
00241 int len, curlen;
00242
00243 len = curlen = 0;
00244
00245 do {
00246
00247 if (curlen + SMALLBUF >= LineBufSize) {
00248 LineBufSize += BUFSIZ;
00249 AG.linebuf = realloc(AG.linebuf, LineBufSize);
00250 TokenBuf = realloc(TokenBuf, LineBufSize);
00251 }
00252
00253
00254 clp =
00255 (Lexer_gets) (AG.linebuf + curlen + 1,
00256 LineBufSize - curlen - 1, Lexer_fp);
00257 if (clp == NULL)
00258 break;
00259
00260
00261 len = strlen(clp);
00262 if (clp[len - 1] == '\n') {
00263 if ((clp[0] == '#') && (curlen == 0)) {
00264
00265 int r, cnt;
00266 char buf[2];
00267 char* s = clp + 1;
00268
00269 if (strncmp(s, "line", 4) == 0) s += 4;
00270 r = sscanf(s, "%d %1[\"]%n", &Line_number, buf, &cnt);
00271 if (r <= 0) Line_number++;
00272 else {
00273 Line_number--;
00274 if (r > 1) {
00275 char* p = s + cnt;
00276 char* e = p;
00277 while (*e && (*e != '"')) e++;
00278 if (e != p) {
00279 *e = '\0';
00280 storeFileName (p, e-p);
00281 }
00282 }
00283 }
00284 clp[0] = 0;
00285 len = 1;
00286 continue;
00287 }
00288 Line_number++;
00289
00290
00291
00292
00293
00294 if (clp[len - 2] == '\\') {
00295 len = len - 2;
00296 clp[len] = '\0';
00297 }
00298 }
00299 curlen += len;
00300
00301 } while (clp[len - 1] != '\n');
00302
00303 if (curlen > 0)
00304 return AG.linebuf + 1;
00305 else
00306 return NULL;
00307 }
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317
00318 static char *html_pair(char *p, agxbuf * tokp)
00319 {
00320 unsigned char c;
00321 int rc, depth = 1;
00322
00323 while (1) {
00324 while ((c = *p)) {
00325 if (c == '>') {
00326 depth--;
00327 if (depth == 0)
00328 return p;
00329 } else if (c == '<')
00330 depth++;
00331 rc = agxbputc(tokp, c);
00332 p++;
00333 }
00334 if ((p = lex_gets()) == NULL) {
00335 agerr(AGWARN,
00336 "non-terminated HTML string starting line %d, file %s\n",
00337 Start_html_string, InfileName);
00338 return 0;
00339 }
00340 }
00341 }
00342
00343
00344
00345
00346
00347
00348
00349 static char *html_string(char *p, agxbuf * token)
00350 {
00351 Start_html_string = Line_number;
00352 p = html_pair(p + 1, token);
00353 if (p)
00354 p++;
00355 return p;
00356 }
00357
00358 int agtoken(char *p)
00359 {
00360 char ch;
00361 TFA_Init();
00362 while ((ch = *p)) {
00363
00364 TFA_Advance(ch & ~127 ? 127 : ch);
00365 p++;
00366 }
00367 return TFA_Definition();
00368 }
00369
00370 int aglex(void)
00371 {
00372 int token;
00373 char *tbuf, *p;
00374 static unsigned char BOM[] = { 0xEF, 0xBB, 0xBF };
00375
00376
00377 if (AG.accepting_state) {
00378 AG.accepting_state = FALSE;
00379 return EOF;
00380 }
00381
00382
00383 do {
00384 if ((LexPtr == NULL) || (LexPtr[0] == '\0'))
00385 if ((LexPtr = lex_gets()) == NULL) {
00386 if (In_comment)
00387 agerr(AGWARN, "nonterminated comment in line %d\n",
00388 Comment_start);
00389 return EOF;
00390 }
00391
00392 if ((Line_number == 1) && !strncmp(LexPtr, (char *) BOM, 3))
00393 LexPtr += 3;
00394 LexPtr = (char *) skip_wscomments(LexPtr);
00395 } while (LexPtr[0] == '\0');
00396
00397 tbuf = TokenBuf;
00398
00399
00400 if (LexPtr[0] == '\"') {
00401 LexPtr = quoted_string(LexPtr, tbuf);
00402 aglval.str = agstrdup(tbuf);
00403 return T_qsymbol;
00404 }
00405
00406
00407 if (LexPtr[0] == '<') {
00408 agxbuf xb;
00409 unsigned char htmlbuf[BUFSIZ];
00410 agxbinit(&xb, BUFSIZ, htmlbuf);
00411 LexPtr = html_string(LexPtr, &xb);
00412 aglval.str = agstrdup_html(agxbuse(&xb));
00413 agxbfree(&xb);
00414 return T_symbol;
00415 }
00416
00417
00418 if (AG.edge_op
00419 && (strncmp(LexPtr, AG.edge_op, strlen(AG.edge_op)) == 0)) {
00420 LexPtr += strlen(AG.edge_op);
00421 return T_edgeop;
00422 }
00423
00424
00425 if ((p = scan_num(LexPtr, tbuf))) {
00426 LexPtr = p;
00427 aglval.str = agstrdup(tbuf);
00428 return T_symbol;
00429 } else {
00430 unsigned char uc = *(unsigned char *) LexPtr;
00431 if (ispunct(uc) && (uc != '_'))
00432 return *LexPtr++;
00433 else
00434 LexPtr = scan_token(LexPtr, tbuf);
00435 }
00436
00437
00438 token = agtoken(tbuf);
00439 if (token == -1) {
00440 aglval.str = agstrdup(tbuf);
00441 token = T_symbol;
00442 }
00443 return token;
00444 }
00445
00446 static void error_context(void)
00447 {
00448 char *p;
00449 char c;
00450 char *buf = AG.linebuf + 1;
00451
00452
00453 if (LexPtr == NULL)
00454 return;
00455 agerr(AGPREV, "context: ");
00456 for (p = LexPtr - 1; (p > buf) && (!isspace(*(unsigned char *) p));
00457 p--);
00458 if (buf < p) {
00459 c = *p;
00460 *p = '\0';
00461 agerr(AGPREV, buf);
00462 *p = c;
00463 }
00464 agerr(AGPREV, " >>> ");
00465 c = *LexPtr;
00466 *LexPtr = '\0';
00467 agerr(AGPREV, p);
00468 *LexPtr = c;
00469 agerr(AGPREV, " <<< ");
00470 agerr(AGPREV, LexPtr);
00471 }
00472
00473 void agerror(char *msg)
00474 {
00475 if (AG.syntax_errors++)
00476 return;
00477 agerr(AGERR, "%s:%d: %s near line %d\n",
00478 InfileName, Line_number, msg, Line_number);
00479 error_context();
00480 }
00481
00482 agerrlevel_t agerrno;
00483 static agerrlevel_t agerrlevel = AGWARN;
00484 static long aglast;
00485 static FILE *agerrout;
00486
00487 void agseterr(agerrlevel_t lvl)
00488 {
00489 agerrlevel = lvl;
00490 }
00491
00492 char *aglasterr()
00493 {
00494 long endpos;
00495 long len;
00496 char *buf;
00497
00498 if (!agerrout)
00499 return 0;
00500 fflush(agerrout);
00501 endpos = ftell(agerrout);
00502 len = endpos - aglast;
00503 buf = malloc(len + 1);
00504 fseek(agerrout, aglast, SEEK_SET);
00505 fread(buf, sizeof(char), len, agerrout);
00506 buf[len] = '\0';
00507 fseek(agerrout, endpos, SEEK_SET);
00508
00509 return buf;
00510 }
00511
00512 static int agerr_va(agerrlevel_t level, char *fmt, va_list args)
00513 {
00514 agerrlevel_t lvl;
00515
00516 lvl = (level == AGPREV ? agerrno : (level == AGMAX) ? AGERR : level);
00517
00518 agerrno = lvl;
00519 if (lvl >= agerrlevel) {
00520 if (level != AGPREV)
00521 fprintf(stderr, "%s: ",
00522 (level == AGERR) ? "Error" : "Warning");
00523 vfprintf(stderr, fmt, args);
00524 va_end(args);
00525 return 0;
00526 }
00527
00528 if (!agerrout) {
00529 agerrout = tmpfile();
00530 if (!agerrout)
00531 return 1;
00532 }
00533
00534 if (level != AGPREV)
00535 aglast = ftell(agerrout);
00536 vfprintf(agerrout, fmt, args);
00537 va_end(args);
00538 return 0;
00539 }
00540
00541 int agerr(agerrlevel_t level, char *fmt, ...)
00542 {
00543 va_list args;
00544
00545 va_start(args, fmt);
00546 return agerr_va(level, fmt, args);
00547 }
00548
00549 void agerrorf(char *fmt, ...)
00550 {
00551 va_list args;
00552
00553 va_start(args, fmt);
00554 agerr_va(AGERR, fmt, args);
00555 }
00556
00557 void agwarningf(char *fmt, ...)
00558 {
00559 va_list args;
00560
00561 va_start(args, fmt);
00562 agerr_va(AGWARN, fmt, args);
00563 }