#!/usr/bin/mawk -We
# *********************************************************************
#  Written by and copyright Carlo Strozzi <carlos@linux.it>.
#
#  sorttable: sorts a table on one or more columns.
#  Copyright (C) 1998-2001 Carlo Strozzi <carlos@linux.it>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  2001-01-03 Ported to NoSQL v3
#  2001-04-17 Added inline help
#  2001-07-16 Fixed column-specific sort(1) options
#	     (by Barrie Stott <G.B.Stott@bolton.ac.uk>)
#  2001-08-06 Fixed handling of global sort(1) options
#  2001-08-17 Added stdio portability
#
#  $Id$
# *********************************************************************

BEGIN {
  NULL = ""; FS = OFS = "\t"; j = 1

  while (ARGV[++i] != NULL) {
    # Test for the debug flag first.
    if (ARGV[i] == "-x" || ARGV[i] == "--debug") {
      debug = 1
      continue
    }
    else if (ARGV[i] == "-N" || ARGV[i] == "--no-header") {
       no_hdr = 1
       continue
    }
    else if (ARGV[i] == "-i" || ARGV[i] == "--input") {
       i_file = ARGV[++i]
       continue
    }
    else if (ARGV[i] == "-o" || ARGV[i] == "--output") {
       o_file = ARGV[++i]
       continue
    }
    else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
       system("grep -v '^#' @NOSQLPATH@/nosql/help/sorttable.txt")
       rc = 1
       exit(rc)
    }

    # Separate sort(1) options from column names.
    if (ARGV[i] ~ /^-/) {
      # Skip forbidden sort(1) options.
      if (ARGV[i] !~ /^-[bcdfinru]+/) continue

      # Separate global sort(1) options.
      if (ARGV[i] ~ /^-[cu]/) global_opt = global_opt " " ARGV[i]
      else col_args[j] = col_args[j] " " ARGV[i]
    }
    else command_cols[j++] = ARGV[i]
  }

  ARGC = 1					# Fix argv[]

  if (o_file == NULL) o_file = "@STDOUT@"
  if (i_file != NULL) { ARGV[1] = i_file; ARGC = 2 }

  command_cols[0] = --j
  j = 0
}

#
# Main loop
#

# Column names and positions.
NR == 1 {
  while (++p <= NF) {
    # Make sure we pick the first occurrence of duplicated column
    # names (it may happen after a join).
    if (P[$p] == NULL) { P[$p] = p; N[p] = $p }
  }

  # Add list of columns, excluding the invalid ones.
  for (i = 1; i <= command_cols[0]; i++) {
    if (P[command_cols[i]] != NULL) {
      # Avoid passing duplicated fields to sort(1).
      if (!done[command_cols[i]]) {
	# Insert column-specific arguments, if any.

        # Build the +POS1 -POS2 directions for sort(1).
        sort_fields = sort_fields " +" P[command_cols[i]] - 1 
        sort_fields = sort_fields substr(col_args[i], 3)
        sort_fields = sort_fields " -" P[command_cols[i]]
        done[command_cols[i]] = 1
      }
    }
    else bad_col = 1
  }

  sort_cmd = "sort -t \"\t\" " global_opt " " sort_fields

  # Print header and dashline.
  if (!no_hdr) {
    print > o_file; gsub(/[^\t]/, "-"); print > o_file;

    # Make sure the header is printed before calling sort(1).
    fflush()

    # Close output file before the back-end sort(1) appends to it.
    if (o_file != "@STDOUT@") close(o_file)
  }
  next
}

# Dashline
NR == 2 {
   # Let's save one concurrent process and a couple of msec.
   sort_cmd = "exec " sort_cmd

   if (o_file != "@STDOUT@") sort_cmd = sort_cmd " >> " o_file
   if (debug) print sort_cmd > "@STDERR@"; next
}

# Table body.
{ print | sort_cmd }

END {
   if (rc) exit(rc)
   exit(close(sort_cmd))		# Return sort(1) exit status.
}

#
# End of program.
#
