/* htmlmodify.l - part of ziproxy package
 * Copyright (c)2002 Juraj Variny<variny@naex.sk>
 *
 * Released subject to GNU General Public License v2 or later version.
 *
 * HTML modification, text compression fuctions
 */
%{
#include <string.h>
#include <stdlib.h>
#include <unistd.h> //for off_t
#include <assert.h>
#include <stdio.h>
#include <zlib.h>

#include "text.h"

const char suff[] = "_or_jpg";

static int n, outsize;

#define YY_DECL static int htmlmodify_yylex(char ** outbuf, gzFile gzfile, int out_type)

#define CHUNKSIZE 4050

#define MY_YY_OUTPUT(buf, size) \
{\
	if ((size == 1) && n + 1 < outsize){\
		(*outbuf)[n] = *buf;\
		n++;\
	}else if(outsize > size + n) {\
		memcpy(*outbuf + n, buf, size);\
		n += size;\
	}else switch(out_type){\
		case OChunked: \
			fprintf(stdout, "%X\r\n", size + n);\
		case OStream:\
			fwrite(*outbuf, n, 1, stdout);\
			fwrite(buf, size, 1, stdout);\
			if (out_type == OChunked) fputs("\r\n", stdout);\
			n = 0;\
			break;\
		case ONormal: {\
			char *new_outbuf;\
			outsize = (size + n)*1.1;\
			new_outbuf = malloc(outsize);\
			memcpy(new_outbuf, *outbuf, n);\
			memcpy(new_outbuf + n, buf, size);\
			n += size;\
			free(*outbuf);\
			*outbuf = new_outbuf;\
			break;\
			}\
		case OGzipStream: \
			gzwrite(gzfile, *outbuf, n);\
			gzwrite(gzfile, buf, size);\
			n = 0;\
			break;\
		default: assert("Invalid out_type parameter to htmlmodify()!\n");\
	}\
}

#define DEFAULT_OUT MY_YY_OUTPUT(yytext, yyleng)

%}
%option prefix="htmlmodify_yy"
%option never-interactive
%option noyywrap
%option nounput
%option caseless

%s TAG

%x IMG_NAME
%x IMG_NAMESQ
%x IMG_NAMEDQ

IMG (href|src|background)[[:space:]]*=[[:space:]]*
SUFF (\.gif|\.png)

%%
\<img[[:space:]]+	|
\<body[[:space:]]+ 	|
\<a[[:space:]]+		|
\<input[[:space:]]+	|
\<table[[:space:]]+	|
\<thead[[:space:]]+	|
\<tbody[[:space:]]+	|
\<tfoot[[:space:]]+	|
\<td[[:space:]]+	|
\<tr[[:space:]]+	{
		BEGIN(TAG);
		DEFAULT_OUT;
		}
		
<TAG>\> 	{
		BEGIN(INITIAL);
		DEFAULT_OUT;
		}

<TAG>{IMG}\'	{ 
		BEGIN(IMG_NAMESQ);
		DEFAULT_OUT;
		}
<TAG>{IMG}\"	{
		BEGIN(IMG_NAMEDQ);
		DEFAULT_OUT;
		}

<TAG>{IMG}	{
		BEGIN(IMG_NAME);
		DEFAULT_OUT;
		}
 
<IMG_NAMESQ>[^']+{SUFF}[[:space:]]*'	|
<IMG_NAMEDQ>[^\"]+{SUFF}[[:space:]]*\"	|
<IMG_NAME>[^[:space:]>]+{SUFF}[[:space:]]*[[:space:]>] {
		int myleng = strrchr(yytext, '.') - &(yytext[0]) + 4; 
		//put image URL without any trailing spaces
		MY_YY_OUTPUT(yytext, myleng); 
		//put adding suffix _or_jpg
		MY_YY_OUTPUT(suff, (sizeof(suff) - 1));
		//put final quote
		MY_YY_OUTPUT(&yytext[yyleng - 1], 1);
		/*fprintf(stderr, "Image: %s\n",yytext);*/
		BEGIN(TAG);
		}

<IMG_NAMESQ>[^']+	|
<IMG_NAMESQ>\'		|
<IMG_NAMEDQ>[^\"]+	|
<IMG_NAMEDQ>\"		|
<IMG_NAME>[^[:space:]>]+ |
<IMG_NAME>[[:space:]>] {
		DEFAULT_OUT;
		BEGIN(TAG);
		}
 
\<[^[:space:]<]+[[:space:]]*	|
[^<=>'\"[:space:]]+[[:space:]]*	|

[\0-\xff]	DEFAULT_OUT;

<IMG_NAMESQ,IMG_NAMEDQ,IMG_NAME><<EOF>>	|
<<EOF>>			yyterminate();

%%

#undef DEFAULT_OUT
#undef MY_YY_OUTPUT
int htmlmodify(char* inbuf, char** outbuf, int out_type)
{
	gzFile gzfile =(gzFile)0;
	
	if(out_type == ONormal)	outsize = strlen(inbuf) * 1.1;
	else outsize = CHUNKSIZE;
	
	*outbuf = malloc(outsize);
	
	if(out_type == OGzipStream)
	{
		if((gzfile = gzdopen(dup(1), "wb9")) == Z_NULL) return 2;
		
	}

	htmlmodify_yy_scan_string(inbuf);

	htmlmodify_yylex(outbuf, gzfile, out_type);

	free(inbuf);

	switch(out_type){
	case OChunked:	if(n > 0)
		{
			printf("%X\r\n", n);
			fwrite(*outbuf, n, 1, stdout);
			free(*outbuf);
		}
		fputs("\r\n0\r\n", stdout);
		break;
	case ONormal: (*outbuf)[n]= '\0';
		break;
	case OGzipStream: 
		if(n > 0)
			gzwrite(gzfile, *outbuf, n);

		gzclose(gzfile);
		break;
	case OStream: if (n > 0)
		fwrite(*outbuf, n, 1, stdout);
		break;
	}

	return 0;
}
