/*
 *  SingIt Lyrics Displayer
 *  Copyright (C) 2000 - 2003 Jan-Marek Glogowski <glogow@stud.fbi.fh-darmstadt.de>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */


#include "editor_query_parser.h"

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>

#ifdef HAVE_LIBXML2
#include <libxml/HTMLparser.h>
#endif

#define FREE(var) \
	if (var != NULL) { g_free(var); }

#define FREE_N(var) \
	if (var != NULL) { g_free(var); var = NULL; }

#define SET(var, val) \
	if (var != NULL) { g_free(var); } \
	var = val;

// Little struct to pass to funtions instead of three vars
typedef struct _BufferHandle {

	const gchar *buf;
	gint         len;
	gint         pos;
}
BufferHandle;

/****************************
 *
 * Lyrics struct handling funtions
 *
 ****************************/

static void lyrics_init(Lyrics *lyr)
{
	lyr->artist = NULL;
	lyr->album = NULL;
	lyr->title = NULL;
	lyr->author = NULL;
	lyr->modified = NULL;
	lyr->text = NULL;
	lyr->db_id = 0;
}

static Lyrics* lyrics_new(void)
{
	Lyrics* result = g_new(Lyrics, 1);

	if (result != NULL)
		{ lyrics_init(result); }

	return result;
}

static void lyrics_free_real(Lyrics *lyr, gboolean compl)
{
	if (lyr == NULL)
		{ return; }

	FREE(lyr->artist);
	FREE(lyr->album);
	FREE(lyr->title);
	FREE(lyr->author);
	FREE(lyr->modified);

	if (lyr->text != NULL)
		{ g_strfreev ((gchar**) lyr->text); }

	if (compl == TRUE)
		g_free(lyr);
}

static void lyrics_free(Lyrics *lyr)
{
	lyrics_free_real(lyr, TRUE);
}

#if 0
static void lyrics_clear(Lyrics *lyr)
{
	if (lyr == NULL)
		{ return; }

	lyrics_free_real(lyr, FALSE);
	lyrics_init(lyr);
}
#endif

static void lyrics_refcopy(Lyrics *from, Lyrics *to)
{
	g_return_if_fail(from != NULL);
	g_return_if_fail(to != NULL);

	to->artist = from->artist;
	to->album = from->album;
	to->title = from->title;
	to->author = from->author;
	to->modified = from->modified;
	to->text = from->text;
	to->db_id = from->db_id;
}

LyrixQueryResult* lyrix_query_result_new(void)
{
	LyrixQueryResult* result = g_new(LyrixQueryResult, 1);

	if (result != NULL) {
		result->result_type = RES_UNDEF;

		result->list.entries = 0;
		result->list.lyrics = NULL;
		result->list.pages = 0;

		result->forward.uri = NULL;

		result->lyrics.lyric_data.db_id = 0;
		result->lyrics.lyric_data.artist = NULL;
		result->lyrics.lyric_data.album = NULL;
		result->lyrics.lyric_data.title = NULL;
		result->lyrics.lyric_data.author = NULL;
		result->lyrics.lyric_data.modified = NULL;
		result->lyrics.lyric_data.text = NULL;

		result->lookup_dur = 0;
		result->status_code = 0;
	}

	return result;
}

static inline void lyrix_query_result_free_entries(LyrixQueryResult *res)
{
	GList *item = res->list.lyrics;
	while (item != NULL) {
		lyrics_free((Lyrics*) item->data);
		item = g_list_next(item);
	}
	g_list_free(res->list.lyrics);
}

static void free_glist_lyrics(gpointer data, gpointer user_data)
{
	lyrics_free((Lyrics*) data);
}

void lyrix_query_result_free(LyrixQueryResult *res)
{
	if (res == NULL)
		{ return; }

	if (res->forward.uri != NULL)
		g_free(res->forward.uri);

	g_list_foreach(res->list.lyrics, free_glist_lyrics, NULL);
	g_list_free(res->list.lyrics);

	g_free(res);
}

static gint lyric_db_cmp (gconstpointer a, gconstpointer b)
{
	return (((Lyrics*) a)->db_id == ((Lyrics*) b)->db_id) ? 0 : -1;
}

#ifdef CODEDEBUG
static void print_results(LyrixQueryResult *res)
{
	GList *item;
	Lyrics* lyr;

	g_return_if_fail(res != NULL);

	item = res->list.lyrics;
	while (item != NULL) {
		lyr = (Lyrics*) item->data;
		g_print("ID: %i\n", lyr->db_id);
		g_print("  %s / %s / %s\n", lyr->artist, lyr->album, lyr->title);
		g_print("======= \n");
		item = g_list_next(item);
	}
}
#else
#define print_results(res)
#endif

/****************************
 *
 * Old XML result parsing funtions
 *
 ****************************/

#ifdef HAVE_LIBXML2
static void parse_result_tree(LyrixQueryResult *res, htmlDocPtr doc, htmlNodePtr cur)
{
	static Lyrics *last_lyrics = NULL;
	gboolean seek_children = FALSE;

	if (!xmlStrcmp(cur->name, (const xmlChar *) "div")) {
		if (!xmlStrcmp(xmlGetProp(cur, "id"), "nb")) {
			res->list.entries = atoi
				(xmlNodeListGetString(doc, cur->xmlChildrenNode, 1));
		}
		else if (!xmlStrncmp(xmlGetProp(cur, "id"), "artist", 6)) {
			last_lyrics = lyrics_new();
			if (last_lyrics != NULL) {
				last_lyrics->artist = g_strdup
					(xmlNodeListGetString(doc, cur->xmlChildrenNode, 1));
				res->list.lyrics =
					g_list_append(res->list.lyrics, last_lyrics);
			}
		}
		else { seek_children = TRUE; }

		if (last_lyrics != NULL) {
			if (!xmlStrncmp(xmlGetProp(cur, "id"), "album", 5)) {
				last_lyrics->album =
					g_strdup(xmlNodeListGetString(doc, cur->xmlChildrenNode, 1));
			}
			else if (!xmlStrncmp(xmlGetProp(cur, "id"), "title", 5)) {
				last_lyrics->title =
					g_strdup(xmlNodeListGetString(doc, cur->xmlChildrenNode, 1));
			}
			else if (!xmlStrncmp(xmlGetProp(cur, "id"), "id", 2)) {
				last_lyrics->db_id =
					atoi(xmlNodeListGetString(doc, cur->xmlChildrenNode, 1));
			}
			else { seek_children = TRUE; }
		}
	}
	else { seek_children = TRUE; }

	if ((cur->xmlChildrenNode != NULL) && (seek_children == TRUE))
		parse_result_tree(res, doc, cur->xmlChildrenNode);

	if (cur->next != NULL)
		parse_result_tree(res, doc, cur->next);
}

static void remove_multi_html_whitespace(gchar* string)
{
	gchar *cp_pos, *pos;

	g_return_if_fail(string != NULL);

	cp_pos = pos = string;

	// Strip all preceding whitespaces
	while (pos[0] != '\0') {
		if ((pos[0] == 13) || (pos[0] == 10) || (pos[0] == ' '))
			pos++;
		else { break; }
	}

	// Copy the first character
	cp_pos[0] = pos[0];
	pos++;

	while (cp_pos[0] != '\0') {
		switch (pos[0]) {
		case  10:
		case  13:
		case ' ':
			// We copy if the previous character wasn't a whitespace
			if (cp_pos[0] != ' ') {
				cp_pos++;
				cp_pos[0] = ' ';
			}
			break;
		default:
			// Normally we just copy the character
			cp_pos++;
			cp_pos[0] = pos[0];
			break;
		}
		pos++;
	}

	// Maybe we have a whitespace at the end?
	if ((cp_pos != string) && ((cp_pos--)[0] == ' '))
		{ cp_pos[0] = '\0'; }

}

static void parse_lyric_id
	(htmlDocPtr doc, htmlNodePtr cur, GSList **string_list, gint *lines)
{
	xmlChar *tmp = NULL, *text = NULL;

	while (cur != NULL) {
		if (!xmlStrcmp(cur->name, (const xmlChar *) "text")) {
			if (text == NULL) {
				text = g_strdup(cur->content);
			}
			else {
				tmp = text;
				text = g_strconcat(text, cur->content, NULL);
				g_free(tmp);
			}
		}
		else if (!xmlStrcmp(cur->name, (const xmlChar *) "br")) {
			if (text == NULL)
				tmp = g_strdup("");
			else
				tmp = text;

			remove_multi_html_whitespace(tmp);
			(*string_list) = g_slist_prepend((*string_list), tmp);
			(*lines)++;
			text = NULL;
		}
		cur = cur->next;
	}

	if (text != NULL) {
		(*string_list) = g_slist_prepend((*string_list), tmp);
		(*lines)++;
		text = NULL;
	}
}

static void parse_lyric_tree(LyrixQueryResult *res, htmlDocPtr doc,
	htmlNodePtr cur, gboolean first)
{
	static GSList *string_list = NULL;
	static gint lines = 0;
	static gboolean error = FALSE;
	static Lyrics *lyr = NULL;
	gboolean seek_children = FALSE;

	if (!xmlStrcmp(cur->name, (const xmlChar *) "div")) {
		if (!xmlStrcmp(xmlGetProp(cur, "id"), "nb")) {
			if (!xmlStrcmp(xmlNodeListGetString(doc, cur->xmlChildrenNode, 1), "1"))
			{
				seek_children = TRUE;
			}
			else {
				fprintf(stderr, "Wrong lyrics nr: %s. Expected 1\n",
					xmlNodeListGetString(doc, cur->xmlChildrenNode, 1));
			}
		}
		else if (!xmlStrncmp(xmlGetProp(cur, "id"), "id", 2)) {
			GList *item;
			Lyrics fake_lyr;
			fake_lyr.db_id =
				atoi(xmlNodeListGetString(doc, cur->xmlChildrenNode, 1));
			item = g_list_find_custom(res->list.lyrics, &lyr, lyric_db_cmp);
			if (item != NULL) {
				lyr = (Lyrics*) item->data;
				seek_children = TRUE;
			}
			else {
				fprintf(stderr, "Wrong lyrics id: %s. Expected %i\n",
					xmlNodeListGetString(doc, cur->xmlChildrenNode, 1),
					(lyr != NULL) ? lyr->db_id : -1);
			}
		}
		else if (!xmlStrncmp(xmlGetProp(cur, "id"), "lyrics", 6)) {
			if ((lyr->text == NULL) && (string_list == NULL)) {
				parse_lyric_id(doc, cur->xmlChildrenNode,
					&string_list, &lines);
			}
			else { fprintf(stderr, "Duplicate lyrics node\n"); }
		}
		else { seek_children = TRUE; }
	}
	else { seek_children = TRUE; }

	if ((cur->xmlChildrenNode != NULL) && (seek_children == TRUE))
		parse_lyric_tree(res, doc, cur->xmlChildrenNode, FALSE);

	if (cur->next != NULL)
		parse_lyric_tree(res, doc, cur->next, FALSE);

	if (first) {
		GSList *slist;
		xmlChar **text;

		text = g_new (xmlChar*, lines + 1);
		text[lines] = NULL;
		lines--;
		for (slist = string_list; slist; slist = slist->next) {
			text[lines] = slist->data;
			lines--;
		}

		if (error == FALSE)
			{ lyr->text = (gchar**) text; }
		else { g_strfreev((gchar**) text); }

		res->lyrics.lyric_data.text = (gchar**) text;

		g_slist_free (string_list);
	}
}

#ifdef CODEDEBUG
#define print_tree(root) \
	real_print_tree(root, 0)

static void real_print_tree(htmlNodePtr cur, gint level)
{
	gint i;

	for (i = 0; i < level; i++)
		g_print(" ");
	g_print("%s\n", cur->name);

	if (cur->xmlChildrenNode != NULL)
		print_tree(cur->xmlChildrenNode, level + 1);

	if (cur->next != NULL)
		print_tree(cur->next, level);
}
#else
#define print_tree(root)
#endif

static inline gboolean parse_xml_data
	(LyrixQueryResult* res, BufferHandle *h, ParseType type)
{
	htmlDocPtr doc = NULL;
	htmlNodePtr cur;

	// * Build an XML tree from the file *
	if (type & PT_FIL)
		doc = htmlParseFile(h->buf, NULL);
	else if (type & PT_BUF)
		doc = htmlParseDoc((gchar*) h->buf, NULL);
	if (doc == NULL)
		goto cleanup_error;

	// * Check the document is of the right kind *
	cur = xmlDocGetRootElement(doc);
	if (cur == NULL) {
		fprintf(stderr,"empty document\n");
		goto cleanup_error;
	}

	if (xmlStrcmp(cur->name, (const xmlChar *) "html")) {
		fprintf(stderr,"document of the wrong type, root node != html");
		goto cleanup_error;
	}

	// * Now, parse the tree. *
	if (type & PT_QUE) {
		print_tree(cur);
		parse_result_tree(res, doc, cur);
	}
	else if (type & PT_LYR) {
		print_tree(cur);

		lyrics_init(&(res->lyrics.lyric_data));
		parse_lyric_tree(res, doc, cur, TRUE);
	}
	else { goto cleanup_error; }

	if (res->list.entries != g_list_length(res->list.lyrics)) {
		fprintf(stderr, "Wrong number of entries: %i. Expected %i\n",
			g_list_length(res->list.lyrics), res->list.entries);
	}

	return TRUE;

cleanup_error:
	if (doc != NULL) { xmlFreeDoc(doc); }
	return FALSE;
}
#endif // HAVE_LIBXML2


/****************************
 *
 * New text based result parsing funtions
 *
 ****************************/

// I think a maximum length of 9 is sufficient (~2^29 * 1.86)
#define MAX_UINT_STR_LEN 9

static gboolean pre_parse_checks
	(BufferHandle *h, gint length, gpointer result)
{
	g_return_val_if_fail(result != NULL, FALSE);
	g_return_val_if_fail(h != NULL, FALSE);
	g_return_val_if_fail(h->buf != NULL, FALSE);
	g_return_val_if_fail(length > 0, FALSE);
	g_return_val_if_fail(length < MAX_UINT_STR_LEN, FALSE);
	g_return_val_if_fail((h->len - h->pos) >= length, FALSE);

	return TRUE;
}

// Since sscanf happily scans floating point values into int
// we need our own - more strict - function
static gboolean parse_unsigned_int(BufferHandle *h, gint length, guint *result)
{
	gchar  atoi_buffer[MAX_UINT_STR_LEN + 1];
	gint   i;

	g_return_val_if_fail
		(pre_parse_checks(h, length, result), FALSE);

	strncpy(atoi_buffer, h->buf + h->pos, length);
	atoi_buffer[length] = '\0';

	for (i = 0; i < length; i++)
		if (!isdigit(atoi_buffer[i]))
			{ return FALSE; }

	h->pos += length;
	(*result) = atoi(atoi_buffer);
	return TRUE;
}

static gboolean parse_double(BufferHandle *h, gint length, gdouble *result)
{
	gchar    atoi_buffer[MAX_UINT_STR_LEN + 1];

	g_return_val_if_fail
		(pre_parse_checks(h, length, result), FALSE);

	strncpy(atoi_buffer, h->buf + h->pos, length);
	atoi_buffer[length] = '\0';

	if (sscanf(atoi_buffer, "%lf", result) == 1) {
		h->pos += length;
		return TRUE;
	}
	return FALSE;
}

// The length is _not_ the length of the buffer, but the length of the
// stringified length of the string
// i.e.: "018Jan-Marek Glogoski" -> len = 3
// i.e.: "5Hallo" -> len = 1
static gchar* get_pascal_parsed_string
	(BufferHandle *h, gint length, gboolean skip)
{
	gint   str_len;
	gchar *result;

	if (parse_unsigned_int(h, length, &str_len) == FALSE)
		{ return NULL; }

	g_return_val_if_fail((h->len - h->pos) >= str_len, NULL);

	if (skip == TRUE) {
		h->pos += str_len;
		return (gchar*) (-1);
	}

	result = g_strndup(&(h->buf[h->pos]), str_len);

	if ((str_len > 0) && result[str_len - 1] == '\0') {
		g_free(result);
		result = NULL;
	}
	else { h->pos += str_len; }

	return result;
}
#undef MAX_UINT_STR_LEN

#define __get_str(len, var) \
	var = get_pascal_parsed_string(h, len, FALSE); \
	if (var == NULL) { \
		error = TRUE; \
		break; \
	}

#define __skip_str(len) \
	if (get_pascal_parsed_string(h, len, TRUE) == NULL) \
	{ \
		error = TRUE; \
		break; \
	}

static gboolean parse_text_query
	(LyrixQueryResult* res, BufferHandle *h, guint items)
{
	guint parsed_items = 0;
	Lyrics *lyrics;
	gboolean error = FALSE;
	GList *new_items = NULL;

	h->pos = 7;

	while (parsed_items < items) {
		lyrics = lyrics_new();
		if (parse_unsigned_int(h, 6, &lyrics->db_id) == FALSE)
		{
			if ((parsed_items + 1) != items)
				{ error = TRUE; }
			else {
				res->list.pages++;
				lyrics_free(lyrics);
			}
			break;
		}

		__get_str(3, lyrics->artist)
		__get_str(3, lyrics->album)
		__get_str(3, lyrics->title)

		if (strncmp(h->buf + h->pos, "10", 2) != 0) {
			error = TRUE;
			break;
		}
		else { h->pos += 2; }

		lyrics->modified = g_strndup(h->buf + h->pos, 10);
		h->pos += 10;

		__get_str(3, lyrics->author)

		new_items = g_list_append(new_items, lyrics);
		parsed_items++;
	}

	if ((error == FALSE) &&
		(parse_double(h, 6, &res->lookup_dur) == FALSE))
	{
		error = TRUE;
	}

	if (error == TRUE) {
		res->result_type = RES_UNDEF;
		res->list.pages = 0;
		g_list_foreach(new_items, free_glist_lyrics, NULL);
		g_list_free(new_items);
		lyrics_free(lyrics);
	}
	else {
		if (parsed_items == items)
			res->list.pages = 0;
		res->list.lyrics = g_list_concat
			(res->list.lyrics, new_items);
		res->list.entries += parsed_items;
		res->result_type = RES_LIST;
	}

	return (error == FALSE);
}

static gboolean parse_text_lyrics
	(LyrixQueryResult* res, BufferHandle *h)
{
	Lyrics fake_lyr, *lyrics, *map_lyr;
	gboolean error = FALSE;
	GList *item;
	gchar *tmp_str;

	h->pos = 7;

	map_lyr = &(res->lyrics.lyric_data);
	lyrics_init(map_lyr);

	if (parse_unsigned_int(h, 6, &fake_lyr.db_id) == FALSE) {
		lyrics_init(map_lyr);
		return FALSE;
	}

	item = g_list_find_custom(res->list.lyrics, &fake_lyr, lyric_db_cmp);
	if (item != NULL) {

		lyrics = (Lyrics*) item->data;

		// We need the loop, otherwise break doesn't work
		do {
			__skip_str(3)
			__skip_str(3)
			__skip_str(3)

			__get_str(5, tmp_str)
			lyrics->text =
				g_strsplit(tmp_str, "\n", 0);
			g_free(tmp_str);
		}
		while (0);
	}
	else {
		lyrics = lyrics_new();

		// We need the loop, otherwise break doesn't work
		do {
			__get_str(3, lyrics->artist)
			__get_str(3, lyrics->album)
			__get_str(3, lyrics->title)

			__get_str(5, tmp_str)
			lyrics->text =
				g_strsplit(tmp_str, "\n", 0);
			g_free(tmp_str);
		}
		while (0);
	}

	error = !parse_double(h, 6, &res->lookup_dur);

	if (error == TRUE) {
		res->result_type = RES_UNDEF;
		if (item == NULL)
			{ lyrics_free(lyrics); }
	}
	else {
		res->result_type = RES_LYRICS;
		lyrics_refcopy(lyrics, map_lyr);
		if (item == NULL) {
			res->list.lyrics = g_list_append
				(res->list.lyrics, lyrics);
			res->list.entries++;
		}
	}

	res->list.pages = 0;
	return (error == FALSE);
}
#undef __skip_str
#undef __get_str

static inline gboolean parse_txt_blob_data
	(LyrixQueryResult* res, BufferHandle *h, ParseType type)
{
	gboolean result = FALSE;
	gint items;
	gchar parse_buf[4];

	if (type & PT_FIL)
		{ goto parse_bailout; }

	if (h->len < 10)
		{ goto parse_bailout; }

	if (strncmp(h->buf, "LYRIX", 5) != 0)
		{ goto parse_bailout; }

	// Do we have a redirect?
	if (strncmp(&(h->buf[5]), "LO", 2) == 0) {
		parse_buf[0] = h->buf[7];
		parse_buf[1] = h->buf[8];
		parse_buf[2] = h->buf[9];
		parse_buf[3] = '\0';

		items = atoi(parse_buf);

		if ((items >= 10) && (items <= (h->len - 10))) {
			res->result_type = RES_FORWARD;
			FREE(res->forward.uri);
			res->forward.uri =
				g_strndup(&(h->buf[10]), items);
			return TRUE;
		}
	}
	else {
		parse_buf[0] = h->buf[5];
		parse_buf[1] = h->buf[6];
		parse_buf[2] = '\0';

		items = atoi(parse_buf);

		if (items <= 0) { goto parse_bailout; }
		if (items == 1) { result = parse_text_lyrics(res, h); }
		else { result = parse_text_query(res, h, items); }
	}

parse_bailout:
	if (result == FALSE)
		res->result_type = RES_UNDEF;
	return result;
}

gboolean lyrix_query_result_parse
	(LyrixQueryResult* res, const gchar *buf, gint buf_len, ParseType type)
{
	BufferHandle handle;

	g_return_val_if_fail(res != NULL, FALSE);
	g_return_val_if_fail(type != PT_UDF, FALSE);

	/// Fill the buffer handle
	handle.buf = buf;
	if (buf_len < 0) { handle.len = strlen(buf); }
	else { handle.len = buf_len; }
	handle.pos = 0;

	// PHP terminates a string with a break
	while ((handle.len > 0) &&
		((handle.buf[handle.len - 1] == '\0') ||
		(handle.buf[handle.len - 1] == 10) ||
		(handle.buf[handle.len - 1] == 13) ||
		(handle.buf[handle.len - 1] == ' ')))
	{
		handle.len--;
	}

	if (type & PT_TXT)
		{ return parse_txt_blob_data(res, &handle, type); }
#ifdef HAVE_LIBXML2
	else if (type & PT_XML)
		{ return parse_xml_data(res, &handle, type); }
#endif

	return FALSE;
}

gchar* lyrix_query_result_get_cached_lyrics
	(LyrixQueryResult* res, gint db_id)
{
	GList *item;
	Lyrics fake_lyr, *lyr;
	fake_lyr.db_id = db_id;
	item = g_list_find_custom(res->list.lyrics, &fake_lyr, lyric_db_cmp);

	if (item != NULL) {
		lyr = (Lyrics*) item->data;
		if (lyr->text != NULL)
			return g_strjoinv("\n", (gchar**) lyr->text);
	}

	return NULL;
}
