/misc/src/release/graphviz-2.18-1/src/graphviz-2.18/lib/common/htmllex.c

Go to the documentation of this file.
00001 /* $Id: htmllex.c,v 1.15 2007/11/02 22:15:52 erg Exp $ $Revision: 1.15 $ */
00002 /* vim:set shiftwidth=4 ts=8: */
00003 
00004 /**********************************************************
00005 *      This software is part of the graphviz package      *
00006 *                http://www.graphviz.org/                 *
00007 *                                                         *
00008 *            Copyright (c) 1994-2004 AT&T Corp.           *
00009 *                and is licensed under the                *
00010 *            Common Public License, Version 1.0           *
00011 *                      by AT&T Corp.                      *
00012 *                                                         *
00013 *        Information and Software Systems Research        *
00014 *              AT&T Research, Florham Park NJ             *
00015 **********************************************************/
00016 
00017 
00018 #include "render.h"
00019 #include "htmltable.h"
00020 #include "htmlparse.h"
00021 #include "htmllex.h"
00022 #include <ctype.h>
00023 
00024 #ifdef HAVE_LIBEXPAT
00025 #include <expat.h>
00026 #endif
00027 
00028 #ifndef XML_STATUS_ERROR
00029 #define XML_STATUS_ERROR 0
00030 #endif
00031 
00032 typedef struct {
00033 #ifdef HAVE_LIBEXPAT
00034     XML_Parser parser;
00035 #endif
00036     char* ptr;                  /* input source */
00037     int tok;                    /* token type   */
00038     agxbuf* xb;                 /* buffer to gather T_string data */
00039     agxbuf  lb;                 /* buffer for translating lexical data */
00040     char warn;                  /* set if warning given */
00041     char error;                 /* set if error given */
00042     char inCell;                /* set if in TD to allow T_string */
00043     char mode;                  /* for handling artificial <HTML>..</HTML> */
00044     char *currtok;              /* for error reporting */
00045     char *prevtok;              /* for error reporting */
00046     int currtoklen;
00047     int prevtoklen;
00048 } lexstate_t;
00049 static lexstate_t state;
00050 
00051 /* error_context:
00052  * Print the last 2 "token"s seen.
00053  */
00054 static void error_context(void)
00055 {
00056     agxbclear(state.xb);
00057     if (state.prevtoklen > 0)
00058         agxbput_n(state.xb, state.prevtok, state.prevtoklen);
00059     agxbput_n(state.xb, state.currtok, state.currtoklen);
00060     agerr(AGPREV, "... %s ...\n", agxbuse(state.xb));
00061 }
00062 
00063 /* htmlerror:
00064  * yyerror - called by yacc output
00065  */
00066 void htmlerror(const char *msg)
00067 {
00068     if (state.error)
00069         return;
00070     state.error = 1;
00071     agerr(AGERR, "%s in line %d \n", msg, htmllineno());
00072     error_context();
00073 }
00074 
00075 #ifdef HAVE_LIBEXPAT
00076 /* lexerror:
00077  * called by lexer when unknown <..> is found.
00078  */
00079 static void lexerror(const char *name)
00080 {
00081     state.tok = T_error;
00082     state.error = 1;
00083     agerr(AGERR, "Unknown HTML element <%s> on line %d \n",
00084           name, htmllineno());
00085 }
00086 
00087 typedef int (*attrFn) (void *, char *);
00088 typedef int (*bcmpfn) (const void *, const void *);
00089 
00090 #define MAX_CHAR    (((unsigned char)(~0)) >> 1)
00091 #define MIN_CHAR    ((signed char)(~MAX_CHAR))
00092 #define MAX_UCHAR   ((unsigned char)(~0))
00093 #define MAX_USHORT  ((unsigned short)(~0))
00094 
00095 /* Mechanism for automatically processing attributes */
00096 typedef struct {
00097     char *name;                 /* attribute name */
00098     attrFn action;              /* action to perform if name matches */
00099 } attr_item;
00100 
00101 #define ISIZE (sizeof(attr_item))
00102 
00103 /* icmp:
00104  * Compare two attr_item. Used in bsearch
00105  */
00106 static int icmp(attr_item * i, attr_item * j)
00107 {
00108     return strcasecmp(i->name, j->name);
00109 }
00110 
00111 static int bgcolorfn(htmldata_t * p, char *v)
00112 {
00113     p->bgcolor = strdup(v);
00114     return 0;
00115 }
00116 
00117 static int hreffn(htmldata_t * p, char *v)
00118 {
00119     p->href = strdup(v);
00120     return 0;
00121 }
00122 
00123 static int titlefn(htmldata_t * p, char *v)
00124 {
00125     p->title = strdup(v);
00126     return 0;
00127 }
00128 
00129 static int portfn(htmldata_t * p, char *v)
00130 {
00131     p->port = strdup(v);
00132     return 0;
00133 }
00134 
00135 static int targetfn(htmldata_t * p, char *v)
00136 {
00137     p->target = strdup(v);
00138     return 0;
00139 }
00140 
00141 /* doInt:
00142  * Scan v for integral value. Check that
00143  * the value is >= min and <= max. Return value in ul.
00144  * String s is name of value.
00145  * Return 0 if okay; 1 otherwise.
00146  */
00147 static int doInt(char *v, char *s, int min, int max, long *ul)
00148 {
00149     int rv = 0;
00150     char *ep;
00151     long b = strtol(v, &ep, 10);
00152 
00153     if (ep == v) {
00154         agerr(AGWARN, "Improper %s value %s - ignored", s, v);
00155         rv = 1;
00156     } else if (b > max) {
00157         agerr(AGWARN, "%s value %s > %d - too large - ignored", s, v, max);
00158         rv = 1;
00159     } else if (b < min) {
00160         agerr(AGWARN, "%s value %s < %d - too small - ignored", s, v, min);
00161         rv = 1;
00162     } else
00163         *ul = b;
00164     return rv;
00165 }
00166 
00167 static int borderfn(htmldata_t * p, char *v)
00168 {
00169     long u;
00170 
00171     if (doInt(v, "BORDER", 0, MAX_UCHAR, &u))
00172         return 1;
00173     p->border = (unsigned char) u;
00174     p->flags |= BORDER_SET;
00175     return 0;
00176 }
00177 
00178 static int cellpaddingfn(htmldata_t * p, char *v)
00179 {
00180     long u;
00181 
00182     if (doInt(v, "CELLPADDING", 0, MAX_UCHAR, &u))
00183         return 1;
00184     p->pad = (unsigned char) u;
00185     p->flags |= PAD_SET;
00186     return 0;
00187 }
00188 
00189 static int cellspacingfn(htmldata_t * p, char *v)
00190 {
00191     long u;
00192 
00193     if (doInt(v, "CELLSPACING", MIN_CHAR, MAX_CHAR, &u))
00194         return 1;
00195     p->space = (signed char) u;
00196     p->flags |= SPACE_SET;
00197     return 0;
00198 }
00199 
00200 static int cellborderfn(htmltbl_t * p, char *v)
00201 {
00202     long u;
00203 
00204     if (doInt(v, "CELLSBORDER", 0, MAX_CHAR, &u))
00205         return 1;
00206     p->cb = (unsigned char) u;
00207     return 0;
00208 }
00209 
00210 static int fixedsizefn(htmldata_t * p, char *v)
00211 {
00212     int rv = 0;
00213     char c = toupper(*(unsigned char *) v);
00214     if ((c == 'T') && !strcasecmp(v + 1, "RUE"))
00215         p->flags |= FIXED_FLAG;
00216     else if ((c != 'F') || strcasecmp(v + 1, "ALSE")) {
00217         agerr(AGWARN, "Illegal value %s for FIXEDSIZE - ignored\n", v);
00218         rv = 1;
00219     }
00220     return rv;
00221 }
00222 
00223 static int valignfn(htmldata_t * p, char *v)
00224 {
00225     int rv = 0;
00226     char c = toupper(*v);
00227     if ((c == 'B') && !strcasecmp(v + 1, "OTTOM"))
00228         p->flags |= VALIGN_BOTTOM;
00229     else if ((c == 'T') && !strcasecmp(v + 1, "OP"))
00230         p->flags |= VALIGN_TOP;
00231     else if ((c != 'M') || strcasecmp(v + 1, "IDDLE")) {
00232         agerr(AGWARN, "Illegal value %s for VALIGN - ignored\n", v);
00233         rv = 1;
00234     }
00235     return rv;
00236 }
00237 
00238 static int halignfn(htmldata_t * p, char *v)
00239 {
00240     int rv = 0;
00241     char c = toupper(*v);
00242     if ((c == 'L') && !strcasecmp(v + 1, "EFT"))
00243         p->flags |= HALIGN_LEFT;
00244     else if ((c == 'R') && !strcasecmp(v + 1, "IGHT"))
00245         p->flags |= HALIGN_RIGHT;
00246     else if ((c != 'C') || strcasecmp(v + 1, "ENTER")) {
00247         agerr(AGWARN, "Illegal value %s for ALIGN - ignored\n", v);
00248         rv = 1;
00249     }
00250     return rv;
00251 }
00252 
00253 static int cell_halignfn(htmldata_t * p, char *v)
00254 {
00255     int rv = 0;
00256     char c = toupper(*v);
00257     if ((c == 'L') && !strcasecmp(v + 1, "EFT"))
00258         p->flags |= HALIGN_LEFT;
00259     else if ((c == 'R') && !strcasecmp(v + 1, "IGHT"))
00260         p->flags |= HALIGN_RIGHT;
00261     else if ((c == 'T') && !strcasecmp(v + 1, "EXT"))
00262         p->flags |= HALIGN_TEXT;
00263     else if ((c != 'C') || strcasecmp(v + 1, "ENTER"))
00264         rv = 1;
00265     if (rv)
00266         agerr(AGWARN, "Illegal value %s for ALIGN in TD - ignored\n", v);
00267     return rv;
00268 }
00269 
00270 static int balignfn(htmldata_t * p, char *v)
00271 {
00272     int rv = 0;
00273     char c = toupper(*v);
00274     if ((c == 'L') && !strcasecmp(v + 1, "EFT"))
00275         p->flags |= BALIGN_LEFT;
00276     else if ((c == 'R') && !strcasecmp(v + 1, "IGHT"))
00277         p->flags |= BALIGN_RIGHT;
00278     else if ((c != 'C') || strcasecmp(v + 1, "ENTER"))
00279         rv = 1;
00280     if (rv)
00281         agerr(AGWARN, "Illegal value %s for BALIGN in TD - ignored\n", v);
00282     return rv;
00283 }
00284 
00285 static int heightfn(htmldata_t * p, char *v)
00286 {
00287     long u;
00288 
00289     if (doInt(v, "HEIGHT", 0, MAX_USHORT, &u))
00290         return 1;
00291     p->height = (unsigned short) u;
00292     return 0;
00293 }
00294 
00295 static int widthfn(htmldata_t * p, char *v)
00296 {
00297     long u;
00298 
00299     if (doInt(v, "WIDTH", 0, MAX_USHORT, &u))
00300         return 1;
00301     p->width = (unsigned short) u;
00302     return 0;
00303 }
00304 
00305 static int rowspanfn(htmlcell_t * p, char *v)
00306 {
00307     long u;
00308 
00309     if (doInt(v, "ROWSPAN", 0, MAX_USHORT, &u))
00310         return 1;
00311     if (u == 0) {
00312         agerr(AGWARN, "ROWSPAN value cannot be 0 - ignored\n");
00313         return 1;
00314     }
00315     p->rspan = (unsigned char) u;
00316     return 0;
00317 }
00318 
00319 static int colspanfn(htmlcell_t * p, char *v)
00320 {
00321     long u;
00322 
00323     if (doInt(v, "COLSPAN", 0, MAX_USHORT, &u))
00324         return 1;
00325     if (u == 0) {
00326         agerr(AGWARN, "COLSPAN value cannot be 0 - ignored\n");
00327         return 1;
00328     }
00329     p->cspan = (unsigned char) u;
00330     return 0;
00331 }
00332 
00333 static int colorfn(htmlfont_t * p, char *v)
00334 {
00335     p->color = strdup(v);
00336     return 0;
00337 }
00338 
00339 static int facefn(htmlfont_t * p, char *v)
00340 {
00341     p->name = strdup(v);
00342     return 0;
00343 }
00344 
00345 static int ptsizefn(htmlfont_t * p, char *v)
00346 {
00347     long u;
00348 
00349     if (doInt(v, "POINT-SIZE", 0, MAX_UCHAR, &u))
00350         return 1;
00351     p->size = (double) u;
00352     return 0;
00353 }
00354 
00355 static int srcfn(htmlimg_t * p, char *v)
00356 {
00357     p->src = strdup(v);
00358     return 0;
00359 }
00360 
00361 static int scalefn(htmlimg_t * p, char *v)
00362 {
00363     p->scale = strdup(v);
00364     return 0;
00365 }
00366 
00367 static int alignfn(int *p, char *v)
00368 {
00369     int rv = 0;
00370     char c = toupper(*v);
00371     if ((c == 'R') && !strcasecmp(v + 1, "IGHT"))
00372         *p = 'r';
00373     else if ((c == 'L') || !strcasecmp(v + 1, "EFT"))
00374         *p = 'l';
00375     else if ((c == 'C') || strcasecmp(v + 1, "ENTER")) 
00376         *p = 'n';
00377     else {
00378         agerr(AGWARN, "Illegal value %s for ALIGN - ignored\n", v);
00379         rv = 1;
00380     }
00381     return rv;
00382 }
00383 
00384 /* Tables used in binary search; MUST be alphabetized */
00385 static attr_item tbl_items[] = {
00386     {"align", (attrFn) halignfn},
00387     {"bgcolor", (attrFn) bgcolorfn},
00388     {"border", (attrFn) borderfn},
00389     {"cellborder", (attrFn) cellborderfn},
00390     {"cellpadding", (attrFn) cellpaddingfn},
00391     {"cellspacing", (attrFn) cellspacingfn},
00392     {"fixedsize", (attrFn) fixedsizefn},
00393     {"height", (attrFn) heightfn},
00394     {"href", (attrFn) hreffn},
00395     {"port", (attrFn) portfn},
00396     {"target", (attrFn) targetfn},
00397     {"title", (attrFn) titlefn},
00398     {"tooltip", (attrFn) titlefn},
00399     {"valign", (attrFn) valignfn},
00400     {"width", (attrFn) widthfn},
00401 };
00402 
00403 static attr_item cell_items[] = {
00404     {"align", (attrFn) cell_halignfn},
00405     {"balign", (attrFn) balignfn},
00406     {"bgcolor", (attrFn) bgcolorfn},
00407     {"border", (attrFn) borderfn},
00408     {"cellpadding", (attrFn) cellpaddingfn},
00409     {"cellspacing", (attrFn) cellspacingfn},
00410     {"colspan", (attrFn) colspanfn},
00411     {"fixedsize", (attrFn) fixedsizefn},
00412     {"height", (attrFn) heightfn},
00413     {"href", (attrFn) hreffn},
00414     {"port", (attrFn) portfn},
00415     {"rowspan", (attrFn) rowspanfn},
00416     {"target", (attrFn) targetfn},
00417     {"title", (attrFn) titlefn},
00418     {"tooltip", (attrFn) titlefn},
00419     {"valign", (attrFn) valignfn},
00420     {"width", (attrFn) widthfn},
00421 };
00422 
00423 static attr_item font_items[] = {
00424     {"color", (attrFn) colorfn},
00425     {"face", (attrFn) facefn},
00426     {"point-size", (attrFn) ptsizefn},
00427 };
00428 
00429 static attr_item img_items[] = {
00430     {"scale", (attrFn) scalefn},
00431     {"src", (attrFn) srcfn},
00432 };
00433 
00434 static attr_item br_items[] = {
00435     {"align", (attrFn) alignfn},
00436 };
00437 
00438 /* doAttrs:
00439  * General function for processing list of name/value attributes.
00440  * Do binary search on items table. If match found, invoke action
00441  * passing it tp and attribute value.
00442  * Table size is given by nel
00443  * Name/value pairs are in array atts, which is null terminated.
00444  * s is the name of the HTML element being processed.
00445  */
00446 static void
00447 doAttrs(void *tp, attr_item * items, int nel, char **atts, char *s)
00448 {
00449     char *name;
00450     char *val;
00451     attr_item *ip;
00452     attr_item key;
00453 
00454     while ((name = *atts++) != NULL) {
00455         val = *atts++;
00456         key.name = name;
00457         ip = (attr_item *) bsearch(&key, items, nel, ISIZE, (bcmpfn) icmp);
00458         if (ip)
00459             state.warn |= ip->action(tp, val);
00460         else {
00461             agerr(AGWARN, "Illegal attribute %s in %s - ignored\n", name,
00462                   s);
00463             state.warn = 1;
00464         }
00465     }
00466 }
00467 
00468 static void mkBR(char **atts)
00469 {
00470     htmllval.i = UNSET_ALIGN;
00471     doAttrs(&htmllval.i, br_items, sizeof(br_items) / ISIZE, atts, "<BR>");
00472 }
00473 
00474 static htmlimg_t *mkImg(char **atts)
00475 {
00476     htmlimg_t *img = NEW(htmlimg_t);
00477 
00478     doAttrs(img, img_items, sizeof(img_items) / ISIZE, atts, "<IMG>");
00479 
00480     return img;
00481 }
00482 
00483 static htmlfont_t *mkFont(char **atts)
00484 {
00485     htmlfont_t *font = NEW(htmlfont_t);
00486 
00487     font->size = -1.0;          /* unassigned */
00488     doAttrs(font, font_items, sizeof(font_items) / ISIZE, atts, "<FONT>");
00489 
00490     return font;
00491 }
00492 
00493 static htmlcell_t *mkCell(char **atts)
00494 {
00495     htmlcell_t *cell = NEW(htmlcell_t);
00496 
00497     cell->cspan = 1;
00498     cell->rspan = 1;
00499     doAttrs(cell, cell_items, sizeof(cell_items) / ISIZE, atts, "<TD>");
00500 
00501     return cell;
00502 }
00503 
00504 static htmltbl_t *mkTbl(char **atts)
00505 {
00506     htmltbl_t *tbl = NEW(htmltbl_t);
00507 
00508     tbl->rc = -1;               /* flag that table is a raw, parsed table */
00509     tbl->cb = -1;               /* unset cell border attribute */
00510     doAttrs(tbl, tbl_items, sizeof(tbl_items) / ISIZE, atts, "<TABLE>");
00511 
00512     return tbl;
00513 }
00514 
00515 static void startElement(void *user, const char *name, char **atts)
00516 {
00517     if (strcasecmp(name, "TABLE") == 0) {
00518         htmllval.tbl = mkTbl(atts);
00519         state.inCell = 0;
00520         state.tok = T_table;
00521     } else if ((strcasecmp(name, "TR") == 0)
00522                || (strcasecmp(name, "TH") == 0)) {
00523         state.inCell = 0;
00524         state.tok = T_row;
00525     } else if (strcasecmp(name, "TD") == 0) {
00526         state.inCell = 1;
00527         htmllval.cell = mkCell(atts);
00528         state.tok = T_cell;
00529     } else if (strcasecmp(name, "FONT") == 0) {
00530         htmllval.font = mkFont(atts);
00531         state.tok = T_font;
00532     } else if (strcasecmp(name, "BR") == 0) {
00533         mkBR(atts);
00534         state.tok = T_br;
00535     } else if (strcasecmp(name, "IMG") == 0) {
00536         htmllval.img = mkImg(atts);
00537         state.tok = T_img;
00538     } else if (strcasecmp(name, "HTML") == 0) {
00539         state.tok = T_html;
00540     } else {
00541         lexerror(name);
00542     }
00543 }
00544 
00545 static void endElement(void *user, const char *name)
00546 {
00547     if (strcasecmp(name, "TABLE") == 0) {
00548         state.tok = T_end_table;
00549         state.inCell = 1;
00550     } else if ((strcasecmp(name, "TR") == 0)
00551                || (strcasecmp(name, "TH") == 0)) {
00552         state.tok = T_end_row;
00553     } else if (strcasecmp(name, "TD") == 0) {
00554         state.tok = T_end_cell;
00555         state.inCell = 0;
00556     } else if (strcasecmp(name, "HTML") == 0) {
00557         state.tok = T_end_html;
00558     } else if (strcasecmp(name, "FONT") == 0) {
00559         state.tok = T_end_font;
00560     } else if (strcasecmp(name, "BR") == 0) {
00561         if (state.tok == T_br)
00562             state.tok = T_BR;
00563         else
00564             state.tok = T_end_br;
00565     } else if (strcasecmp(name, "IMG") == 0) {
00566         if (state.tok == T_img)
00567             state.tok = T_IMG;
00568         else
00569             state.tok = T_end_img;
00570     } else {
00571         lexerror(name);
00572     }
00573 }
00574 
00575 /* characterData:
00576  * Generate T_string token. Do this only when immediately in
00577  * <TD>..</TD> or <HTML>..</HTML>, i.e., when inCell is true.
00578  * Strip out formatting characters but keep spaces.
00579  * Distinguish between all whitespace vs. strings with non-whitespace
00580  * characters.
00581  */
00582 static void characterData(void *user, const char *s, int length)
00583 {
00584     int i, rc, cnt = 0;
00585     unsigned char c;
00586 
00587     if (state.inCell) {
00588         for (i = length; i; i--) {
00589             c = *s++;
00590             if (c >= ' ') {
00591                 cnt++;
00592                 rc = agxbputc(state.xb, c);
00593             }
00594         }
00595         if (cnt) state.tok = T_string;
00596     }
00597 }
00598 #endif
00599 
00600 int initHTMLlexer(char *src, agxbuf * xb, int charset)
00601 {
00602 #ifdef HAVE_LIBEXPAT
00603     state.xb = xb;
00604     agxbinit (&state.lb, SMALLBUF, NULL);
00605     state.ptr = src;
00606     state.mode = 0;
00607     state.warn = 0;
00608     state.error = 0;
00609     state.currtoklen = 0;
00610     state.prevtoklen = 0;
00611     state.inCell = 1;
00612     state.parser = XML_ParserCreate(charsetToStr(charset));
00613     XML_SetElementHandler(state.parser,
00614                           (XML_StartElementHandler) startElement,
00615                           endElement);
00616     XML_SetCharacterDataHandler(state.parser, characterData);
00617     return 0;
00618 #else
00619     static int first;
00620     if (!first) {
00621         agerr(AGWARN,
00622               "Not built with libexpat. Table formatting is not available.\n");
00623         first++;
00624     }
00625     return 1;
00626 #endif
00627 }
00628 
00629 int clearHTMLlexer()
00630 {
00631 #ifdef HAVE_LIBEXPAT
00632     int rv = state.warn | state.error;
00633     XML_ParserFree(state.parser);
00634     agxbfree (&state.lb);
00635     return rv;
00636 #else
00637     return 1;
00638 #endif
00639 }
00640 
00641 #ifdef HAVE_LIBEXPAT
00642 /* eatComment:
00643  * Given first character after open comment, eat characters
00644  * upto comment close, returning pointer to closing > if it exists,
00645  * or null character otherwise.
00646  * We rely on HTML strings having matched nested <>.
00647  */
00648 static char *eatComment(char *p)
00649 {
00650     int depth = 1;
00651     char *s = p;
00652     char c;
00653 
00654     while (depth && (c = *s++)) {
00655         if (c == '<')
00656             depth++;
00657         else if (c == '>')
00658             depth--;
00659     }
00660     s--;                        /* move back to '\0' or '>' */
00661     if (*s) {
00662         char *t = s - 2;
00663         if ((t < p) || strncmp(t, "--", 2)) {
00664             agerr(AGWARN, "Unclosed comment\n");
00665             state.warn = 1;
00666         }
00667     }
00668     return s;
00669 }
00670 
00671 /* findNext:
00672  * Return next XML unit. This is either <..>, an HTML 
00673  * comment <!-- ... -->, or characters up to next <.
00674  */
00675 static char *findNext(char *s, agxbuf* xb)
00676 {
00677     char* t = s + 1;
00678     char c;
00679     int rc;
00680 
00681     if (*s == '<') {
00682         if ((*t == '!') && !strncmp(t + 1, "--", 2))
00683             t = eatComment(t + 3);
00684         else
00685             while (*t && (*t != '>'))
00686                 t++;
00687         if (*t != '>') {
00688             agerr(AGWARN, "Label closed before end of HTML element\n");
00689             state.warn = 1;
00690         } else
00691             t++;
00692     } else {
00693         rc = agxbputc(xb, *s);
00694         while ((c = *t) && (c != '<')) {
00695             if ((c == '&') && (*(t+1) != '#')) {
00696                 t = scanEntity(t + 1, xb);
00697             }
00698             else {
00699                 rc = agxbputc(xb, c);
00700                 t++;
00701             }
00702         }
00703     }
00704     return t;
00705 }
00706 #endif
00707 
00708 int htmllineno()
00709 {
00710 #ifdef HAVE_LIBEXPAT
00711     return XML_GetCurrentLineNumber(state.parser);
00712 #else
00713     return 0;
00714 #endif
00715 }
00716 
00717 #ifdef DEBUG
00718 static void printTok(int tok)
00719 {
00720     char *s;
00721 
00722     switch (tok) {
00723     case T_BR:
00724         s = "T_BR";
00725         break;
00726     case T_br:
00727         s = "T_br";
00728         break;
00729     case T_end_br:
00730         s = "T_end_br";
00731         break;
00732     case T_end_table:
00733         s = "T_end_table";
00734         break;
00735     case T_row:
00736         s = "T_row";
00737         break;
00738     case T_end_row:
00739         s = "T_end_row";
00740         break;
00741     case T_end_cell:
00742         s = "T_end_cell";
00743         break;
00744     case T_html:
00745         s = "T_html";
00746         break;
00747     case T_end_html:
00748         s = "T_end_html";
00749         break;
00750     case T_string:
00751         s = "T_string";
00752         break;
00753     case T_error:
00754         s = "T_error";
00755         break;
00756     case T_table:
00757         s = "T_table";
00758         break;
00759     case T_cell:
00760         s = "T_cell";
00761         break;
00762     case T_img:
00763         s = "T_img";
00764         break;
00765     case T_end_img:
00766         s = "T_end_img";
00767         break;
00768     case T_IMG:
00769         s = "T_IMG";
00770         break;
00771     default:
00772         s = "<unknown>";
00773     }
00774     if (tok == T_string) {
00775         fprintf(stderr, "%s \"", s);
00776         fwrite(agxbstart(state.xb), 1, agxblen(state.xb), stderr);
00777         fprintf(stderr, "\"\n");
00778     } else
00779         fprintf(stderr, "%s\n", s);
00780 }
00781 
00782 #endif
00783 
00784 int htmllex()
00785 {
00786 #ifdef HAVE_LIBEXPAT
00787     static char *begin_html = "<HTML>";
00788     static char *end_html = "</HTML>";
00789 
00790     char *s;
00791     char *endp = 0;
00792     int len, llen;
00793     int rv;
00794 
00795     state.tok = 0;
00796     do {
00797         if (state.mode == 2)
00798             return EOF;
00799         if (state.mode == 0) {
00800             state.mode = 1;
00801             s = begin_html;
00802             len = strlen(s);
00803             endp = 0;
00804         } else {
00805             s = state.ptr;
00806             if (*s == '\0') {
00807                 state.mode = 2;
00808                 s = end_html;
00809                 len = strlen(s);
00810             } else {
00811                 endp = findNext(s,&state.lb);
00812                 len = endp - s;
00813             }
00814         }
00815         state.prevtok = state.currtok;
00816         state.prevtoklen = state.currtoklen;
00817         state.currtok = s;
00818         state.currtoklen = len;
00819         if ((llen = agxblen(&state.lb)))
00820             rv = XML_Parse(state.parser, agxbuse(&state.lb),llen, 0);
00821         else
00822             rv = XML_Parse(state.parser, s, len, (len ? 0 : 1));
00823         if (rv == XML_STATUS_ERROR) {
00824             if (!state.error) {
00825                 agerr(AGERR, "%s in line %d \n",
00826                       XML_ErrorString(XML_GetErrorCode(state.parser)),
00827                       htmllineno());
00828                 error_context();
00829                 state.error = 1;
00830                 state.tok = T_error;
00831             }
00832         }
00833         if (endp)
00834             state.ptr = endp;
00835     } while (state.tok == 0);
00836     return state.tok;
00837 #else
00838     return EOF;
00839 #endif
00840 }

Generated on Mon Mar 31 19:03:24 2008 for Graphviz by  doxygen 1.5.1