/* Awk front end for NoSQL tables that sets the field
 * separator to "\t" and presets variables named with
 * the column names in the input table to be their
 * corresponding column numbers.  This requires that
 * column names be valid awk variable names.
 *
 * Copyright (c) 2003 Micah Stetson <micah@cnm-vra.com>
 * 
 * Permission is hereby granted, free of charge, to any
 * person obtaining a copy of this software and associated
 * documentation files (the "Software"), to deal in the
 * Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute,
 * sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do
 * so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice
 * shall be included in all copies or substantial portions
 * of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
 * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
 * TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 * 
 $Id: awktable.c,v 1.3 2005/04/08 15:29:04 carlo Exp $
 */
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include "util.h"

#ifndef LIBDIR
#define LIBDIR "/usr/local/nosql/lib"
#endif
#ifndef FINSERT
#define FINSERT (LIBDIR "/striphead.awk")
#endif

#define INSERT "BEGIN{FS=OFS=\"\\t\"}/^\\001/{NR-=1;FNR-=1;next}"

void
usage(void)
{
	fprintf(stderr, "Usage: %s [-pH] [-i input] [-W option] [-v var=value] [--] 'program text' [arg ...]\n", argv0);
	fprintf(stderr, "    or %s [-pH] [-i input] [-W option] [-v var=value] -f program-file [--] [arg ...]\n", argv0);
	exit(1);
}

/*
 * We allocate a fair bit with malloc(), but never free
 * anything.  The program is very short lived, and needs
 * everything it malloc()s till the end, so we rely on the
 * system to free the memory at exit.
 */
int
main(int argc, char **argv)
{
	Strlist *args;
	FILE *in = stdin;
	char **f;
	char *l;
	int ncol, nf, i;
	int fflag = 0, header = 0, justprint = 0;

	args = strlist();
	slappend(args, "mawk");

	ARGBEGIN{
	case 'f':
		if (!fflag) {
			slappend(args, "-f");
			slappend(args, FINSERT);
			fflag = 1;
		}
		slappend(args, "-f");
		slappend(args, EARGF(usage()));
		break;
	case 'H':
		header = 1;
		break;
	case 'i':
		in = efreopen(EARGF(usage()), "r", stdin);
		break;
	case 'p':
		justprint = 1;
		break;
	case 'v':
		slappend(args, "-v");
		slappend(args, EARGF(usage()));
		break;
	case 'W':
		slappend(args, "-W");
		slappend(args, EARGF(usage()));
		break;
	default:
		usage();
	}ARGEND

	/* We don't want to read any more than we use,
	 * so the awk program gets all the data. */
	/* if (in == stdin) */
		setvbuf(in, NULL, _IONBF, 0);

	l = frdstr(in, '\n', 1);
	if (l == NULL || l[0] != '\001')
		fatal("not a valid table");

	for (i=0,ncol=0; l[i] != '\0'; i++)
		if (l[i] == '\001')
			ncol++;

	/* extra column for detecting corrupt rows */
	f = emalloc((ncol+1) * sizeof(char *));

	nf = getfields(l, f, ncol+1, 0, "\t");
	if (nf != ncol)
		fatal("corrupt header line");
	
	for (i=0; i<nf; i++) {
		if (f[i][0] == '\001') {
			int n = snprintf(NULL, 0, "%s=%d", f[i]+1, i+1) + 1;
			char *s = emalloc(n);
			snprintf(s, n, "%s=%d", f[i]+1, i+1);
			slappend(args, "-v");
			slappend(args, s);
			if (header)
				printf("%s%s", i!=0 ? "\t" : "", f[i]);
		} else
			fatal("corrupt header line");
	}
	if (header)
		printf("\n");

	slappend(args, "--");
	if (!fflag) {
		char *prog;
		if (*argv == NULL)
			usage();
		prog = emalloc(strlen(INSERT)+strlen(*argv)+1);
		*prog = '\0';
		strcat(prog, INSERT);
		strcat(prog, *argv);
		slappend(args, prog);
		argv++;
	}
	for (; *argv != NULL; argv++)
		slappend(args, *argv);
	slappend(args, NULL);

	if (justprint) {
		char **a;
		for (a=sl2vec(args); *a != NULL; a++)
			printf("%s ", *a);
		printf("\n");
	} else {
		fflush(stdout);
		execvp("mawk", sl2vec(args));
		fatal("couldn't exec() %s:", "mawk");
	}

	return 0;
}
