/*
 * Jeffrey Friedl
 * Omron Corporation			ʳ
 * Nagaokakyoshi, Japan			617Ĺ
 *
 * jfriedl@nff.ncl.omron.co.jp
 *
 * This work is placed under the terms of the GNU General Purpose License
 * (the "GNU Copyleft").
 *
 * October 1993
 *
 * Search an indexed file via a regular expression pattern.
 *
 * We'll compile the regex and look at the characters that all matching lines
 * will have. Looking at the index we'll only bother trying the regex on
 * lines that have all characters required. Of course, if we come up with
 * no required characters, we'll have to check every line.
 */
#include <ctype.h>
#include "lib/config.h"
#include "lib/output.h"
#include "lib/assert.h"
#include "lib/index.h"
#include "lib/loadfile.h"
#include "lib/jregex.h"
#include "lib/replace.h"
#include "lookup.h"

/*
TODO:
Optimize (code size/time) for when APPLY_REGEX_MAX_CHAR_COUNT is 1.
*/

void output_line(const struct slot_info *slot, String *line, unsigned length)
{
    kibishii_assert(!COMBO(slot));

    if (slot->current_flag.modify)
    {
	static string *new = 0;
	int retval;

	if (new)
	    free(new);

	retval = apply_substitution(&slot->modify_spec.regex, &new, 0, line,
				    length, slot->modify_spec.replacement,
				    slot->modify_spec.global ? 1000 : 1);
	if (retval != APP_SUB_SUCCESS)
	    new = 0;
	else
	    length = str_len(line = new);
    }

    if (slot->current_flag.tag)
    {
	kibishii_assert(slot->tag_string);
	output((const char *)slot->tag_string);
    }

 #ifdef HAVE_SPINNER
    /* output an initial space if the first char is a tab, so that it
       will be sure to erase any spinner */
    if (line[0] == '\t' && lookup.spinner.interval)
    {
      #ifndef LOG_FILE_SUPPORT
	outchar(' ');
      #else
	/*
	 * But we don't want to output the space to the log file,
	 * (if there is one), so we have to be careful about that.
	 */
	int log_fd = set_extra_output_file(JUST_CHECKING_OUTPUT_FILE);
	if (current_log_file == 0)
	    outchar(' ');
	else {
	    flush_output();
	    set_extra_output_file(NO_OUTPUT_FILE); /* turn logging off */
	    outchar(' ');
	    flush_output();
	    set_extra_output_file(log_fd);
	}
      #endif /* LOG_FILE_SUPPORT */
    }
 #endif /* HAVE_SPINNER */

 #ifdef SERVER_CONFIG
    while (length--)
	outchar(*line++);
 #else

    if (!slot->current_flag.highlight ||
	 !regexec(&lookup.search[0].regex, line, length) ||
	regexec_match_start == regexec_match_end)
    {
	while (length--)
	    outchar(*line++);
    } else {
	String *lineend = line + length;
	static DECL_STYLES;

	while (line < regexec_match_start)
	    outchar(*line++);

	(void)output_pager_transparent(1);

	output((lookup.flag.hl_style == HL_STYLE_HTML)
	           ? (const char *)lookup.slot->highlight_tag
	           : styles[lookup.flag.hl_style]);

	(void)output_pager_transparent(0);

	while (regexec_match_start < regexec_match_end)
	    outchar(*regexec_match_start++);

	(void)output_pager_transparent(1);
	if (lookup.flag.hl_style != HL_STYLE_HTML)
	    output("\033[0m");
	else
	    outputf("</%s", lookup.slot->highlight_tag + 1);

	(void)output_pager_transparent(0);

	while (regexec_match_end < lineend)
	    outchar(*regexec_match_end++);
    }
 #endif /* SERVER_CONFIG */
    outchar('\n');
}

/*
 * Save the given line to lookup.list[]
 */
static __inline__ void
save_to_list(fileloc line, const struct slot_info *slot)
{
    if (lookup.list.used >= lookup.list.size)
	lookup.list.overflow++;
    else
    {
	lookup.list.array[lookup.list.used].slot = slot;
	lookup.list.array[lookup.list.used++].line = line;
    }
}

#define CONTINUE_SEARCH   0
#define ABORT_SEARCH      1

/*
 * Return true if we should stop searching.
 */
static int
matchedline(const struct slot_info *slot,
	    String *line,
	    fileloc loc,
	    unsigned length)
{
    lookup.count.matched++;

    /* If were're filtering and should filter this line, do so. */
    if (slot->current_flag.filter && slot->filter_spec.negative !=
	regexec(&slot->filter_spec.regex, line, length))
    {
	lookup.count.filtered++;
	if (!lookup.count.nonword) /* only save if not saving nonwords */
	{
	  #ifdef HAVE_SPINNER
	    if (lookup.count.filtered == 1)
	    {
		lookup.spinner.chars = (String *)"";
		lookup.spinner.char_count = 5;
	    }
	  #endif /* HAVE_SPINNER */
	    save_to_list(loc, slot);
	}
	return CONTINUE_SEARCH;
    }

    /* If skipping non-words, do so if this one counts */
    if (slot->current_flag.word &&
	!(regexec_match_at_start_of_word && regexec_match_at_end_of_word))
    {
	if (lookup.count.nonword++ == 0)
	{
	    /* if was using for filtered lines, switched to non-word ones */
	    lookup.list.overflow = lookup.list.used = 0;
	  #ifdef HAVE_SPINNER
	    lookup.spinner.chars = (String *)"";
	    lookup.spinner.char_count = 5;
	  #endif /* HAVE_SPINNER */
	}
	save_to_list(loc, slot);
	return CONTINUE_SEARCH;
    }

    if (lookup.lines_to_print_this_time && !--lookup.lines_to_print_this_time)
    {
	if (slot->current_flag.display)
	    output("search aborted: matched-line count over limit\n");
	return ABORT_SEARCH;
    }

    lookup.count.printed++;
    if (slot->current_flag.display)
	output_line(slot, line, length);
    return CONTINUE_SEARCH;
}


#define _local_label(name, tag)  name ## tag
#define local_label(name,tag) _local_label(name, tag)

#ifdef LOG_FILE_SUPPORT
# define LOG_FILE_STUFF(stuff) stuff
#else
# define LOG_FILE_STUFF(stuff) /*nothing*/
#endif

#ifndef HAVE_SPINNER
# define SPINNER_STUFF  /* nothing */
#else
# define SPINNER_STUFF                                                       \
    if (lookup.spinner.interval && --till_next_spinner < 0)                  \
    {                                                                        \
      LOG_FILE_STUFF(                                                        \
	int log_fd = 0;  /* initialize only to shut up warnings */           \
	if (current_log_file != 0) {                                         \
	    log_fd = set_extra_output_file(JUST_CHECKING_OUTPUT_FILE);       \
	    flush_output();                                                  \
	    set_extra_output_file(NO_OUTPUT_FILE); /* turn logging off */    \
	})                                                                   \
                                                                             \
	if (++spinner_position >= lookup.spinner.char_count)                 \
	    spinner_position = 0;                                            \
	outchar(lookup.spinner.chars[spinner_position*2]);                   \
	outchar(lookup.spinner.chars[spinner_position*2+1]);                 \
	outchar('\r');                                                       \
	flush_output();                                                      \
      LOG_FILE_STUFF(                                                        \
	if (current_log_file != 0)                                           \
	    set_extra_output_file(log_fd);                                   \
      )                                                                      \
	till_next_spinner = lookup.spinner.interval;                         \
    }
#endif

#define do_actual_check(V, POS, LENDEST)                                     \
{                                                                            \
     unsigned _length_;                                                      \
     VirtFile *v = (V);                                                      \
     fileloc pos = (POS);                                                    \
     String *_text_ = VirtPos2Str(v, pos, &_length_);                        \
     int _i_;                                                                \
                                                                             \
     (LENDEST) = _length_;                                                   \
                                                                             \
     lookup.count.checked++;                                                 \
     SPINNER_STUFF;                                                          \
                                                                             \
     for (_i_ = 0; _i_ < lookup.patterns; _i_++)                             \
	 if (!regexec(&lookup.search[_i_].regex, _text_, _length_) !=        \
	     lookup.search[_i_].not)                                         \
	 {                                                                   \
	     if (apply_regex_abort)                                          \
		 return ABORT_SEARCH;                                        \
	     else                                                            \
		 goto local_label(fail, __LINE__);                           \
	 }                                                                   \
                                                                             \
    /* if we get here, the regex(es) matched */                              \
     if (apply_regex_abort)                                                  \
	 return ABORT_SEARCH;                                                \
     if (matchedline(slot, _text_, pos, _length_) == ABORT_SEARCH)           \
	 return CONTINUE_SEARCH;                                             \
     local_label(fail, __LINE__) :                                           \
    (void)1; /* this here because ANSI requires something after the lable */ \
}

/*
 * We'll consider up to this many different required characters.
 */
#ifndef APPLY_REGEX_MAX_CHAR_COUNT
#define APPLY_REGEX_MAX_CHAR_COUNT 10
#endif

/*
 * Can be set externally to cause the search to abort.
 */
volatile unsigned apply_regex_abort = 0;

static int apply_regex_to_file(const struct slot_info *slot)
{
    /* for char #n, charcount[n] will have #lines with that char */
    unsigned charcount[APPLY_REGEX_MAX_CHAR_COUNT];
 #ifdef HAVE_SPINNER
    unsigned spinner_position = 0;
    int till_next_spinner = 0;
 #endif /* HAVE_SPINNER */

    /* pointer into index's packed list for char #n */
    String *packed_list_ptr[APPLY_REGEX_MAX_CHAR_COUNT];
    static MemItem mem_packed_list_ptr[APPLY_REGEX_MAX_CHAR_COUNT];

    /* pointer to char #n's current line in the text */
    fileloc current_line[APPLY_REGEX_MAX_CHAR_COUNT];
    int used = 0; /* how many of each array are used */

    int ret;
    int i;

    unsigned char HI[APPLY_REGEX_MAX_CHAR_COUNT];
    unsigned char LO[APPLY_REGEX_MAX_CHAR_COUNT];
    unsigned int pat_index = 0;
    const unsigned char *list = 0;

    #define KID_HIRA_HI 0244    /* high byte for hiragana EUC */
    #define KID_KATA_HI 0245    /* high byte for katakana EUC */

    unsigned first_pattern_flags, nonfirst_pattern_flags;
    unsigned generic_flags = 0;
    int MemIndex = IsMemIndex(slot->file->index);

    kibishii_assert(lookup.patterns != 0);
    kibishii_assert(!COMBO(slot));
    kibishii_assert(!slot->current_flag.filter || slot->filter_spec.pattern);
    kibishii_assert(!slot->current_flag.modify || slot->modify_spec.pattern);

    if (slot->current_flag.fuzz)
	generic_flags |= REGCOMP_FUZZY_KANA_REPETITION;

    if (slot->current_flag.fold)
	generic_flags |= REGCOMP_IGNORE_ALPHA_CASE | REGCOMP_IGNORE_KANA_CASE;

    if (lookup.flag.regex_debug)
	generic_flags |= REGCOMP_DEBUG;

    first_pattern_flags = REGCOMP_CALC_MUSTHAVE | generic_flags |
	(slot->current_flag.word ? REGCOMP_WANT_WORD_MATCH_INFO
	 : REGCOMP_JUST_MATCH);

    nonfirst_pattern_flags = generic_flags | REGCOMP_JUST_MATCH;

    /* make sure any previous regexes were freed */
    for (i = 0; i < MAX_PATS_ON_ONE_LINE; i++)
	regfree(&lookup.search[i].regex);

    for (i = 0; i < lookup.patterns; i++)
    {
	unsigned these_flags =
	    i==0 ? first_pattern_flags : nonfirst_pattern_flags;
	if (!lookup.search[i].not)
	    these_flags |= REGCOMP_CALC_MUSTHAVE;

	ret = regcomp(&lookup.search[i].regex, lookup.search[i].pattern,
		      these_flags);
	if (ret == REGCOMP_NEED_SAVE_PAREN_INFO)
	    ret = regcomp(&lookup.search[i].regex, lookup.search[i].pattern,
			  (these_flags| REGCOMP_SAVE_MATCHED_PAREN_INFO));

	if (lookup.flag.debug)
	{
	    outputf("pattern: %s>> %s <<\n", lookup.search[i].not ? "NOT ":"",
		    lookup.search[i].pattern);
	    showregex(&lookup.search[i].regex);
	}

	if (ret != REGCOMP_SUCCESS)
	{
	    if (lookup.flag.debug)
		outputf("[bad regcomp returns %d]\n", ret);
	    output((const char *)regcomp_error_report());
	    return ABORT_SEARCH;
	}
    }


    /*
     * Return the next line in the list for char #I, which will either
     * be zero (if the list is now exhausted) or the current line
     * bumped up by the amount according to the packed list.
     */
#define NULL_POINTER -1

#define ORIG_NEXTLINE(I)                                                     \
    (charcount[I] == 0 ? NULL_POINTER :                                      \
     (charcount[I]--,                                                        \
      current_line[I] + read_packed_value(&packed_list_ptr[I])))

#define MEM_NEXTLINE(I)                                                      \
    (charcount[I] == 0 ? NULL_POINTER :                                      \
     (charcount[I]--,                                                        \
      current_line[I] + mem_read_packed_value(&mem_packed_list_ptr[I])))

#define NEXTLINE(I) (MemIndex ? MEM_NEXTLINE(I) : ORIG_NEXTLINE(I))
    /*
     * Find the APPLY_REGEX_MAX_CHAR_COUNT least common characters
     * that *must* be in the pattern.
     */

    while (used < APPLY_REGEX_MAX_CHAR_COUNT)
    {
	unsigned char hi, lo;
	elementcount count;
	int res;

	while (list == 0 || list[0] == 0)
	{
	    if (pat_index >= lookup.patterns)
	    {
		list = 0;
		break;
	    }
	    list = regmusthave(&lookup.search[pat_index++].regex);
	}
	if (list == 0)
	    break;

	/* since the index *always* folds case, make sure to do that here*/
	if (list[0] & 0x80) {
	    hi = (*list++) & 0x7f;
	    lo = (*list++) & 0x7f;
	    if (hi == (KID_KATA_HI & 0x7f))
		hi =  (KID_HIRA_HI & 0x7f);
	} else {
	    hi = 0;
	    lo = *list++;
	    if (isupper(lo))
		lo = tolower(lo);
	}

	/* if we've already seen this character, ignore */
	for (i = 0; i < used; i++)
	    if (LO[i] == lo && HI[i] == hi)
		break;
	if (i < used)
	{
	    /* outputf("already have %x %x\n", hi, lo); */
	    continue;
	}
	

	if (MemIndex)
	    res = mem_get_index_count(slot->file->index, hi, lo, &count);
	else
	    res = get_index_count(slot->file->index, hi, lo, &count);

	if (!res || count == 0)
	{
	    if (hi != 0 || isalnum(lo))
	    {
		if (lookup.flag.debug)
		{
		    outputf("Character [%c%c] not in index; "
			    "apparently no lines in the file contain it.\n",
			    hi ? (hi|0x80) : hi, hi ? (lo|0x80) : lo);
		}

		return CONTINUE_SEARCH;
	    }

	    continue; /* just ignore if non-alphanumeric ASCII */
	}


	if (count == SKIPPED_COUNT)
	    continue;


	/* get the pointer to the packed_list for the character */
	if (MemIndex) {
	    IndexOffset io;
	    res = mem_get_index_list(slot->file->index, hi, lo, &io);
	    SetMem(&mem_packed_list_ptr[used], slot->file->index->FileP, io);
	} else {
	    res = get_index_list(slot->file->index, hi, lo,
				 &packed_list_ptr[used]);
	}

	if (!res)
	    kibishii_assert(0);

	HI[used] = hi;
	LO[used] = lo;
	charcount[used] = count;

	/*
	 * Prime CURRENT_LINE to point to the beginning of the
	 * file, then use NEXTLINE to get what's really the first
	 * line in the list for char #used.
	 */
	current_line[used] = 0;
	current_line[used] = NEXTLINE(used);
	used++;
    }


    if (used && lookup.flag.debug)
    {
	output("looking for lines with characters [");
	for (i = 0; i < used; i++)
	{
	    if (HI[i] == 0)
		outchar(LO[i]);
	    else
	    {
		outchar(HI[i]|0x80);
		outchar(LO[i]|0x80);
	    }
	}
	output("]\n");
    }

    if (used == 0)
    {
	/* must search all lines... do that now */
	fileloc line = 0;
	fileloc end = slot->file->v->length;

	if (lookup.flag.debug)
	    output("[checking all lines]\n");

	while (line < end)
	{
	    unsigned len;
	    do_actual_check(slot->file->v, line, len);
	    line += len + 1;
	}
    }
    else for (;;)
    {
	/*
	 * Now we have info on USED characters:
	 *    current_line[i] holds a pointer to the text file for char #i
	 */
	int holdsmax = 0;
	fileloc max, startmax;

	for (i = 1; i < used; i++) {
	    if (current_line[i] > current_line[holdsmax])
		holdsmax = i;
	}
        startmax = max = current_line[holdsmax];

	for (i = 0; i < used; i++)
	{
	    if (apply_regex_abort)
		return ABORT_SEARCH;
	    if (i == holdsmax)
	        continue;

	    while (current_line[i] < max)
	    {
		if (current_line[i] = NEXTLINE(i),
		    current_line[i] == NULL_POINTER)
		{
		    return CONTINUE_SEARCH;
		}
	    }

	    if (current_line[i] > max) {
		max = current_line[holdsmax = i];
	    }
	}

        if (startmax == max)
	{
	    /* "log_fd may be used uninitialized" below. */
	    unsigned len;
	    do_actual_check(slot->file->v, max, len);

	    /* skip passed one just done */
	    for (i = 0; i < used; i++) {
		soft_assert(current_line[i] == max);
		if (current_line[i] = NEXTLINE(i),
		    current_line[i] == NULL_POINTER)
		    return CONTINUE_SEARCH;
	    }
	}

    }
    return CONTINUE_SEARCH;
}

void apply_regex(void)
{
    unsigned old_regexec_flags =
	regexec_setflags(lookup.flag.regex_debug ? REGEXEC_DEBUG : 0);


    /*
     * If setting lookup.lines_to_print_this_time to anything nonzero, add
     * one extra. This is because the number internally means "the line
     * to abort on", so if we want to print 10 lines, we must abort on #11.
     */
    lookup.lines_to_print_this_time =
	lookup.max_lines_to_print ? (lookup.max_lines_to_print + 1) : 0;
 #ifdef HAVE_SPINNER
    lookup.spinner.chars = (String *)"";
    lookup.spinner.char_count = 4;
 #endif /* HAVE_SPINNER */
    lookup.count.filtered = 0;
    lookup.count.checked = 0;
    lookup.count.matched = 0;
    lookup.count.printed = 0;
    lookup.count.nonword = 0;
    lookup.list.overflow = 0;
    apply_regex_abort = 0;
    lookup.list.used = 0;

    if (!COMBO(lookup.slot))
	apply_regex_to_file(lookup.slot);
    else
    {
	unsigned int i;
	for (i = 0; i < lookup.slot->combo.entries; i++)
	{
	    struct slot_info *slot =
		lookup.slot_info[lookup.slot->combo.entry[i]];

	    kibishii_assert(!COMBO(slot));
            #define setflag(FLAG) slot->current_flag.FLAG = \
	        (lookup.slot->current_flag.FLAG & slot->default_flag.FLAG);

	    setflag(word);
	    setflag(fold);
	    setflag(highlight);
	    setflag(filter);
	    setflag(modify);
	    setflag(display);

	    /* special case for tag... goes on if either flag on */
	    kibishii_assert(!slot->default_flag.tag || slot->tag_string);
            slot->current_flag.tag = slot->tag_string == 0 ? 0 :
		(lookup.slot->current_flag.tag || slot->default_flag.tag);
	    kibishii_assert(!slot->current_flag.tag || slot->tag_string);

	    apply_regex_to_file(slot);
	}
    }

    regexec_setflags(old_regexec_flags);

    if (apply_regex_abort) {
	output_pager_reset_more();
        output("<<aborted>>\n");
    }
}
