/* PSPP - computes sample statistics.
   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
   Written by Ben Pfaff <blp@gnu.org>.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
   02111-1307, USA. */

#include <config.h>
#include <stdio.h>
#include <math.h>
#include "common.h"
#include "error.h"
#include "misc.h"
#include "str.h"
#include "lexer.h"
#include "var.h"

#undef DEBUGGING
/*#define DEBUGGING 1 */
#include "debug-print.h"

/* The two different types of samples. */
enum
  {
    TYPE_A_FROM_B,		/* 5 FROM 10 */
    TYPE_FRACTION		/* 0.5 */
  };

/* SAMPLE transformation. */
typedef struct
  {
    trns_header h;
    int type;			/* One of TYPE_*. */
    int n, N;			/* TYPE_A_FROM_B: n from N. */
    int m, t;			/* TYPE_A_FROM_B: # selected so far; # so far. */
    int frac;			/* TYPE_FRACTION: a fraction out of 65536. */
  }
sample_trns;

int sample_trns_proc (any_trns *, ccase *);

int
cmd_sample (void)
{
  sample_trns *trns;

  int type;
  int a, b;
  int frac;

  match_id (SAMPLE);

  force_num ();
  if (tokint == NOT_LONG)
    {
      type = TYPE_FRACTION;
      if (tokval <= 0 || tokval >= 1)
	return msg (SE, _("The sampling factor must be between 0 and 1 "
		    "exclusive. "));
      frac = tokval * 65536;
      a = b = 0;
    }
  else
    {
      type = TYPE_A_FROM_B;
      a = tokint;
      get_token ();
      force_match_id (FROM);
      force_int ();
      b = tokint;
      if (a >= b)
	return msg (SE, _("Cannot sample %d observations from a population of "
		    "%d."), a, b);
      frac = 0;
    }
  get_token ();

#if DEBUGGING
  if (type == TYPE_FRACTION)
    printf ("SAMPLE %g.\n", frac / 65536.);
  else
    printf ("SAMPLE %d FROM %d.\n", a, b);
#endif

  trns = xmalloc (sizeof (sample_trns));
  trns->h.proc = sample_trns_proc;
  trns->h.free = NULL;
  trns->type = type;
  trns->n = a;
  trns->N = b;
  trns->m = trns->t = 0;
  trns->frac = frac;
  add_transformation ((any_trns *) trns);

  if (token != '.')
    return syntax_error (_("expecting end of command"));
  return 1;
}

int
sample_trns_proc (any_trns * trns, unused ccase * c)
{
  sample_trns *t = (sample_trns *) trns;
  double U;

  if (t->type == TYPE_FRACTION)
    return ((shuffle () & 0xffff) <= t->frac) - 2;

  if (t->m >= t->n)
    return -2;

  U = rand_uniform (1);
  if ((t->N - t->t) * U >= t->n - t->m)
    {
      t->t++;
      return -2;
    }
  else
    {
      t->m++;
      t->t++;
      return -1;
    }
}
