/* Search/replace functions (with and without regular expressions).

	Copyright (C) 1993-1998 Sebastiano Vigna 
	Copyright (C) 1999-2001 Todd M. Lewis and Sebastiano Vigna

	This file is part of ne, the nice editor.

	This program is free software; you can redistribute it and/or modify it
	under the terms of the GNU General Public License as published by the
	Free Software Foundation; either version 2, or (at your option) any
	later version.
	
	This program is distributed in the hope that it will be useful, but
	WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
	General Public License for more details.
	
	You should have received a copy of the GNU General Public License along
	with this program; see the file COPYING.  If not, write to the Free
	Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
	02111-1307, USA.  */


#include "ne.h"
#include "regex.h"


/* This is the initial allocation size for regex.library. */

#define START_BUFFER_SIZE 4096


/* This array is used both by the Boyer-Moore algorithm and by the regex
library. It is updated if b->find_string_changed is TRUE (it should always
be the first time the string is searched for). */

static unsigned char d[256];

/* This macro upper cases a character or not, depending on the boolean
sense_case. It is used in find(). Note that the char returned in unsigned,
but the argument needs not to be. */

#define CONV(c)	(unsigned char)(sense_case ? c : up_case[(unsigned char)c])


/* This function performs a search for the given pattern with a simplified
Boyer-Moore algorithm starting at the given position, in the given
direction, skipping a possible match at the current cursor position if
skip_first is TRUE. If dir is 0, it is fetched from b->opt.search_back. If
pattern is NULL, it is fetched from b->find_string. In this case,
b->find_string_changed is checked, and, if it is FALSE, the string is not
recompiled. Please check to set b->find_string_changed whenever a new string
is set in b->find_string. The cursor is moved on the occurrence position if
a match is found. */

int find(buffer *b, const char *pattern, int dir, int skip_first) {

	int sense_case = (b->opt.case_search != 0);
	unsigned char c, first_char;
	char *p;
	long i, m;
	int recompile_string;
	long y;
	line_desc *ld;

	if (!pattern) {
		pattern = b->find_string;
		recompile_string = b->find_string_changed || b->last_was_regexp;
	}
	else recompile_string = TRUE;

	if (!dir) dir = b->opt.search_back ? -1 : 1;

	if (!pattern || !(m = strlen(pattern))) return(ERROR);

	if (recompile_string) memset(d, m, sizeof(d));

	ld = b->cur_line_desc;
	y = b->cur_line;

	if (dir>0) {

		if (recompile_string) {
			for(i=0; i<m-1; i++) d[CONV(pattern[i])] = m-i-1;
			b->find_string_changed = 0;
		}

		p = ld->line + b->cur_pos + m-1 + (skip_first ? 1 : 0);
		first_char = CONV(pattern[m-1]);

		while(y<b->line_num) {

			assert(ld->ld_node.next != NULL);

			if (ld->line_len >= m) {

				while((p - ld->line) < ld->line_len) {

					if ((c = CONV(*p)) != first_char) p+=d[c];
					else {
						for (i=1; i<m; i++)
							if (CONV(*(p-i)) != CONV(pattern[m-i-1])) {
								p+=d[c];
								break;
							}
						if (i == m) {
							goto_line(b, y);
							goto_pos(b, (p - ld->line)-m+1);
							return(OK);
						}
					}
				}
			}

			ld = (line_desc *)ld->ld_node.next;
			if (ld->ld_node.next) p = ld->line+m-1;
			y++;
		}
	}
	else {

		if (recompile_string) {
			for(i=m-1; i>0; i--) d[(unsigned char)CONV(pattern[i])] = i;
			b->find_string_changed = 0;
		}

		p = ld->line + (b->cur_pos > ld->line_len-m ? ld->line_len-m : b->cur_pos + (skip_first ? -1 : 0));
		first_char = CONV(pattern[0]);

		while(y>=0) {

			assert(ld->ld_node.prev != NULL);

			if (ld->line_len >= m) {

				while((p - ld->line) >= 0) {

					if ((c = CONV(*p)) != first_char) p-=d[c];
					else {
						for (i=1; i<m; i++)
							if (CONV(*(p+i)) != CONV(pattern[i])) {
								p-=d[c];
								break;
							}
						if (i == m) {
							goto_line(b, y);
							goto_pos(b, p - ld->line);
							return(OK);
						}
					}
				}
			}

			ld = (line_desc *)ld->ld_node.prev;
			if (ld->ld_node.prev) p = ld->line+ld->line_len-m;
			y--;
		}
	}

	return(NOT_FOUND);
}



/* This function replaces n characters with the given string at the current
cursor position, and then moves it to the end of the string. */

int replace(buffer *b, int n, const char *string) {

	int len;

	if (!string) string = b->replace_string;

	if (!string) return(ERROR);

	len = strlen(string);

	start_undo_chain(b);

	delete_stream(b, b->cur_line_desc, b->cur_line, b->cur_pos, n);

	if (len) insert_stream(b, b->cur_line_desc, b->cur_line, b->cur_pos, string, len+1);

	end_undo_chain(b);

	goto_pos(b, b->cur_pos+len);

	return(OK);
}



/* The following variables are used by regex. In particular, re_reg holds
the stard/end of the extended replacement registers. */

static struct re_pattern_buffer re_pb;
static struct re_registers re_reg;


/* This function works exactly like find(), but uses regex.library instead. */

int find_regexp(buffer *b, const char *regex, int dir, int skip_first) {

	const char *p;
	int y, recompile_string;
	long i, start_pos;
	line_desc *ld;

	if (!regex) {
		regex = b->find_string;
		recompile_string = b->find_string_changed || !b->last_was_regexp;
	}
	else recompile_string = TRUE;

	if (!dir) dir = b->opt.search_back ? -1 : 1;

	if (!regex || !strlen(regex)) return(ERROR);

	if (re_pb.buffer == NULL) {
		if (re_pb.buffer = malloc(START_BUFFER_SIZE)) re_pb.allocated = START_BUFFER_SIZE;
		else return(OUT_OF_MEMORY);
	}

	re_pb.fastmap = (void *)d;

	/* We have to be careful: even if the search string has not changed, it
	is possible that case sensitivity has. In this case, we force recompilation. */

	if (b->opt.case_search) {
		if (re_pb.translate != 0) recompile_string = TRUE;
		re_pb.translate = 0;
	}
	else {
		if (re_pb.translate != (char *)up_case) recompile_string = TRUE;
		re_pb.translate = (char *)up_case;
	}

	/* Here we have a very dirty hack: since we cannot return the error of regex,
	we print it here. Which means that we access term.c's functions. Ack! */

	if (recompile_string && (p = re_compile_pattern(regex, strlen(regex), &re_pb))) {
		print_message(p);
		ring_bell();
		return(ERROR);
	}

	b->find_string_changed = 0;

	ld = b->cur_line_desc;
	y = b->cur_line;

	if (dir>0) {

		start_pos = b->cur_pos+(skip_first ? 1 : 0);

		while(y<b->line_num) {

			assert(ld->ld_node.next != NULL);

			if (start_pos <= ld->line_len &&
				(i = re_search(&re_pb, ld->line ? ld->line : "", ld->line_len, start_pos, ld->line_len-start_pos, &re_reg))>=0) {
				goto_line(b, y);
				goto_pos(b, i);
				return(0);
			}

			ld = (line_desc *)ld->ld_node.next;
			start_pos = 0;
			y++;
		}
	}
	else {

		start_pos = b->cur_pos+(skip_first ? -1 : 0);

		while(y>=0) {

			assert(ld->ld_node.prev != NULL);

			if (start_pos >= 0 &&
				(i =re_search(&re_pb, ld->line ? ld->line : "", ld->line_len, start_pos, -start_pos-1, &re_reg))>=0) {
				goto_line(b, y);
				goto_pos(b, i);
				return(OK);
			}

			ld = (line_desc *)ld->ld_node.prev;
			if (ld->ld_node.prev) start_pos = ld->line_len;
			y--;
		}
	}

	return(NOT_FOUND);
}



/* This function replaces a regular expression. The given string can contain
\0, \1 etc. for the pattern matched by the i-th pair of brackets (\0 is the
whole string). */

int replace_regexp(buffer *b, const char *string) {

	int i, pos, len, c, reg_used = FALSE;
	unsigned char *p, *q, *t = NULL;

	if (!string) string = b->replace_string;

	if (!string) return(ERROR);

	if (q = p = str_dup(string)) {

		len = strlen(p);

		while(TRUE) {
			while(*q && *q != '\\') q++;

			if (!*q) break;

			i = *(q+1)-'0';

			if (*(q+1) == '\\') {
				memmove(q, q+1, strlen(q+1)+1);
				q++;
				len--;
			}
			else if (i>=0 && i<RE_NREGS && re_reg.start[i]>=0) {
				*q++ = 0;
				*q++ = i;
				reg_used = TRUE;
			}
			else {
				free(p);
				return(WRONG_CHAR_AFTER_BACKSLASH);
			}
		}

		if (reg_used) {
			if (t = malloc(re_reg.end[0]-re_reg.start[0]+1)) {
				memcpy(t, b->cur_line_desc->line+re_reg.start[0], re_reg.end[0]-re_reg.start[0]);
				t[re_reg.end[0]-re_reg.start[0]] = 0;
			}
			else {
				free(p);
				return(OUT_OF_MEMORY);
			}
		}

		for(i=RE_NREGS-1; i>=0; i--) {
			re_reg.end[i] -= re_reg.start[0];
			re_reg.start[i] -= re_reg.start[0];
		}

		start_undo_chain(b);

		delete_stream(b, b->cur_line_desc, b->cur_line, b->cur_pos, re_reg.end[0]);

		q = p;
		pos = 0;

		while(TRUE) {
			if (strlen(q)) {
				insert_stream(b, b->cur_line_desc, b->cur_line, b->cur_pos+pos, q, strlen(q)+1);
				pos += strlen(q);
			}

			q += strlen(q)+1;

			if (q-p > len) break;

			if (re_reg.end[*q]-re_reg.start[*q]) {

				c = t[re_reg.end[*q]];
				t[re_reg.end[*q]] = 0;

				insert_stream(b, b->cur_line_desc, b->cur_line, b->cur_pos+pos, t+re_reg.start[*q], re_reg.end[*q]-re_reg.start[*q]+1);

				t[re_reg.end[*q]] = c;

				pos += re_reg.end[*q]-re_reg.start[*q];
			}

			q++;
		}

		end_undo_chain(b);

		goto_pos(b, b->cur_pos+pos);
		if (pos == 0) char_right(b);

		free(t);
		free(p);
	}
	else return(OUT_OF_MEMORY);

	return(OK);
}
