#!/usr/bin/mawk -We
# *********************************************************************
#  Written by and copyright Carlo Strozzi <carlos@linux.it>.
#
#  row: select table rows matching a given expression that refers to
#       column names.
#  Copyright (C) 1998-2001 Carlo Strozzi <carlos@linux.it>
# 
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
# 
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
# 
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  2001-01-03 Ported to NoSQL v3
#  2001-02-02 Fixed a bug in parser() 
#  2001-04-17 Added inline help
#  2001-08-17 Added stdio portability
#
#  $Id$
# *********************************************************************

BEGIN {
  NULL = ""; FS = OFS = "\t"; srand()

  while (ARGV[++i] != NULL) {
    if (ARGV[i] == "-x" || ARGV[i] == "--debug") debug = 1
    else if (ARGV[i] == "-N" || ARGV[i] == "--no-header") no_hdr = 1
    else if (ARGV[i] == "-e" || ARGV[i] == "--not-empty") not_empty = 1
    else if (ARGV[i] == "-t" || ARGV[i] == "--test") {
      test_only = 1 ; first = 1 ; no_hdr = 1
    }
    else if (ARGV[i] == "-f" || ARGV[i] == "--first-match") first = 1
    else if (ARGV[i] == "-K" || ARGV[i] == "--key") key_only = 1
    else if (ARGV[i] == "-a" || ARGV[i] == "--add") {
       add_row = 1; new_key = ARGV[++i]
       gsub(/\t/, "\\t", new_key)
       gsub(/\n/, "\\n", new_key)
       # Escaping must be done twice, as the result will further
       # be aval'ed by the back-end awk.
       new_key = cMawkBug(new_key)
       new_key = cMawkBug(new_key)
    }
    else if (ARGV[i] == "-i" || ARGV[i] == "--input") i_file = ARGV[++i]
    else if (ARGV[i] == "-o" || ARGV[i] == "--output") o_file = ARGV[++i]
    else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
       system("grep -v '^#' @NOSQLPATH@/nosql/help/row.txt")
       rc = 1
       exit(rc)
    }
    else awk_program = awk_program " " ARGV[i]
  }
  if (awk_program == NULL)  exit

  ARGC = 1					# Fix argv[]

  if (i_file != NULL) { ARGV[1] = i_file; ARGC = 2 }
}

NR == 1 {		# Start building the back-end awk program.

  if (add_row) {
     empty_row = $0
     gsub(/[^\t]/, NULL, empty_row)
     if (new_key != NULL) {
	sub(/\t/, new_key "\t", empty_row)	# If multiple fields.
	sub(/^$/, new_key, empty_row)		# If single-field table.
     }
  }

  awkpgm = "BEGIN{FS=OFS=\"\\t\";}"

  if (not_empty) awkpgm = awkpgm "$0~/^[ \t]+$/{next}"

  # Do not let duplicated input columns fool us.
  for (col = 1; col <= NF; col++) {
    if (col_names[$(col)] == NULL) {
      col_names[$(col)] = $(col)
      awk_col[$(col)] = "$(" col ")"
    }
  }

  if (no_hdr) awkpgm = awkpgm " NR<3{next}"
  else {
     if (key_only) awkpgm = awkpgm " NR<3{print $1;next}"
     else awkpgm = awkpgm " NR<3{print;next}"
  }
  awkpgm = awkpgm parser(awk_program) "{_nosql_nr++;"
  if (test_only) awkpgm = awkpgm "_nosql_ok=1;"
  else if (key_only) awkpgm = awkpgm "print $1;"
  else awkpgm = awkpgm "print;"
  if (first) awkpgm = awkpgm "exit"
  awkpgm = awkpgm "}END{"
  if (test_only) awkpgm = awkpgm "print (_nosql_ok/1);"
  if (add_row) awkpgm = awkpgm "if(!_nosql_nr) print \"" empty_row "\";"
  awkpgm = "mawk '" awkpgm "}'"

  # Let's save one concurrent process and a couple of msec.
  awkpgm = "exec " awkpgm
  if (o_file != NULL) awkpgm = awkpgm " > " o_file

  if (debug) {
    print "\n" awk_program "\n" awkpgm "\n" > "@STDERR@"
  }
}

{ print | awkpgm }

END {
   if (rc) exit(rc)
   exit(close(awkpgm))		# Return back-end mawk(1) exit status.
}

#
# Function section.
#

# *********************************************************************
# parser(string)
# *********************************************************************

function parser(in_pgm,		i, var_name, col_pos, p_length, \
				a, quoted, escaped, slashed, \
				tilde, out_pgm) {

  p_length = split(in_pgm, a, "")

  # The next statement is necessary, to make sure the last section 
  # of the parser is entered once even after the end of the input AWK
  # program.

  p_length++

  while (++i <= p_length) {

    # Do not remove the next two lines of code. They can be *very*
    # useful to debug the parser. Simply comment them out when
    # not in use.
    #
    # "q:" stands for "quoted", "e:" for "escaped, etc.
    #
    #print "q:" quoted " e:" escaped " t:" tilde " s:" slashed \
    #    " a:" a[i] > "@STDERR@"

    # The "/" character triggers regexp matching only after "~",
    # otherwise it stands for the divide operator.
    if (a[i] == "~" && !escaped && !quoted && !slashed) tilde = 1

    if (a[i] == "\\" && !escaped) {
      escaped = 1; out_pgm = out_pgm a[i] ; continue
    }
    if (a[i] == "/" && !escaped && !quoted && tilde) {
      if (slashed) tilde = slashed = 0
      else slashed = 1
    }
    if (a[i] == "\"" && !escaped && !slashed) {
      quoted ? quoted = 0 : quoted = 1
    }
    escaped = 0
    if (quoted || slashed) { out_pgm = out_pgm a[i] ; continue }
    if (a[i] ~ /[A-Za-z_]/) { var_name = var_name a[i] ; continue }
    if (var_name != NULL) {
      if (a[i] ~ /[0-9]/) { var_name = var_name a[i] ; continue }
      col_pos = awk_col[var_name]
      if (col_pos != NULL) { out_pgm = out_pgm col_pos }
      else out_pgm = out_pgm var_name
      var_name = NULL
    }
    out_pgm = out_pgm a[i]
  }
  gsub(/'/, "\\047", out_pgm)		# Escape single quotes for sh(1).
  return out_pgm
}

# *********************************************************************
# cMawkBug(string)
#
# Takes a string and turns all '\' characters into their escaped form
# '\\'. Returns the escaped string. This could be done with just a gsub(),
# but mawk(1) has a bug that makes it behave differently from other awk
# implementations:
#
# gsub(/\\/, "\\\\", field)		# This works with both gawk(1)
#					# and the original nawk(1).
#
# gsub(/\\/, "\\\\\\", field)		# This works just with mawk(1),
#					# otherwise it produces more
#					# backslashes than necessary,
#					# which looks rather obvious.
#
# *********************************************************************
function cMawkBug(s,		a,i,j,S) {

   i = split(s, a, "\\")
   S = a[1]
   for (j = 2; j <= i; j++) S = S "\\\\" a[j]
   return S
}

#
# End of program.
#
