/*b
 * Copyright (C) 2001,2002  Rick Richardson
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Author: Rick Richardson <rickr@mn.rr.com>
b*/

/*
 * This is a simple byte-stream lexical analyzer for SGML.  It produces only
 * two tokens: tag or text, which are returned via a callback function.
 *
 * see sgml.h for doco.
 *
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "debug.h"
#include "sgml.h"

struct sgml_lexer
{
    int		state;

    char	*buf;
    int		bufsize;
    char	*bufp;
    char	*bufe;
    int		eof;
};

void
sgml_lexer_http(SGML_LEXER *lp)
{
    lp->state = 'h';
    lp->eof = 0;
    lp->bufp = lp->buf;
}

void
sgml_lexer_reset(SGML_LEXER *lp)
{
    lp->state = 0;
    lp->eof = 0;
    lp->bufp = lp->buf;
}

SGML_LEXER *
sgml_lexer_new(int bufsize)
{
    SGML_LEXER	*lp;

    if (bufsize == 0)
	bufsize = 4096;

    lp = malloc(sizeof(SGML_LEXER));
    if (!lp)
	return NULL;
    lp->buf = malloc(bufsize);
    if (!lp->buf)
    {
	free(lp);
	return NULL;
    }
    lp->bufsize = bufsize;
    lp->bufe = lp->buf + lp->bufsize - 1;
    lp->bufp = lp->buf;
    sgml_lexer_reset(lp);
    return lp;
}

void
sgml_lexer_destroy(SGML_LEXER *lp)
{
    if (!lp)
	return;
    if (lp->buf)
	free(lp->buf);
    free(lp);
}

SGML_LEXER_RC
sgml_lexer_putc(SGML_LEXER *lp, int c, SGML_LEXER_CB cb, void *cbarg)
{
    if (lp->eof)
	return SGML_LEXER_EOF;

    if (c == EOF)
	lp->eof = 1;

    switch (lp->state)
    {
    case 'h':
	    // Parse HTTP first
	    if (c == '\r')
		break;
	    else if (c == '\n')
	    {
		if (lp->bufp == lp->buf)
		{
		    lp->state = 0;
		    debug(5, "HTTP END\n");
		    if (cb)
			(*cb)(cbarg, SGML_LEXER_HTTP_END, NULL);
		}
		else
		{
		    *lp->bufp = 0;
		    debug(5, "HTTP: <%s>\n", lp->buf);
		    if (cb)
			(*cb)(cbarg, SGML_LEXER_HTTP, lp->buf);
		    lp->bufp = lp->buf;
		    return SGML_LEXER_CONT;
		}
	    }
	    else
	    {
		*lp->bufp++ = c;
		if (lp->bufp == lp->bufe)
		{
		    *lp->bufp = 0;
		    debug(5, "Trunc HTTP: <%s>\n", lp->buf);
		    if (cb)
			(*cb)(cbarg, SGML_LEXER_HTTP_TRUNC, lp->buf);
		    lp->bufp = lp->buf;
		    return SGML_LEXER_CONT;
		}
	    }
	    break;
    case 0:
	    // Eat chars until first tag
	    if (c == '<')
	    {
		lp->state = '<';
		lp->bufp = lp->buf;
	    }
	    break;
    case '<':
	    if (c == EOF)
		return SGML_LEXER_EOF;
	    else if (c == '>')
	    {
		*lp->bufp = 0;
		debug(5, "Tag: <%s>\n", lp->buf);
		if (cb)
		    (*cb)(cbarg, SGML_LEXER_TAG, lp->buf);
		lp->state = 't';
		lp->bufp = lp->buf;
		return SGML_LEXER_CONT;
	    }
	    else
	    {
		if (c == '\r' || c == '\n')
		    c = ' ';
		*lp->bufp++ = c;
		if (lp->bufp == lp->bufe)
		{
		    *lp->bufp = 0;
		    debug(5, "Trunc Tag: <%s>\n", lp->buf);
		    if (cb)
			(*cb)(cbarg, SGML_LEXER_TAG_TRUNC, lp->buf);
		    lp->bufp = lp->buf;
		    return SGML_LEXER_CONT;
		}
	    }
	    break;
    case 't':
	    if (c == '<' || c == EOF)
	    {
		lp->state = '<';
		if (lp->bufp != lp->buf)
		{
		    *lp->bufp = 0;
		    debug(5, "Text: <%s>\n", lp->buf);
		    if (cb)
			(*cb)(cbarg, SGML_LEXER_TEXT, lp->buf);
		    lp->bufp = lp->buf;
		}
		return (c == EOF) ? SGML_LEXER_EOF : SGML_LEXER_CONT;
	    }
	    else
	    {
		*lp->bufp++ = c;
		if (lp->bufp == lp->bufe)
		{
		    *lp->bufp = 0;
		    debug(5, "Trunc Text: <%s>\n", lp->buf);
		    if (cb)
			(*cb)(cbarg, SGML_LEXER_TEXT_TRUNC, lp->buf);
		    lp->bufp = lp->buf;
		    return SGML_LEXER_CONT;
		}
	    }
	    break;
    default:
	    lp->eof = 1;
	    return SGML_LEXER_EOF;
    }
    return SGML_LEXER_CONT;
}

/* vim:set sw=4: */
