#include <features.h>
#if !defined __GLIBC__ || __GLIBC__ < 2
#error "Need Berekely libdb header v 1.85"
#endif
#if __GLIBC__ == 2 && __GLIBC_MINOR__ == 0
#include <db.h>
#else
#include <db1/db.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <signal.h>
#include <string.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>

#include "indexl.h"

static unsigned
hash_hash(const void *key, unsigned len) {

  const char *k, *e;
  unsigned h;
  
  k = key;
  e = k + len;
  for (h = 0; k < e; ++k) {
    h *= 16777619;
    h ^= *k;
  }
  return (h);

}

static HASHINFO H={0,0,0,0,hash_hash,0};
static const unsigned char Totals_key=0;

int
hash_open(Hash *h,const char *s,int new) {

  if (new)
    H.cachesize=1000000;

  if (!(h->d=dbopen(s,new ? O_CREAT|O_RDWR|O_TRUNC : O_RDONLY,0700,DB_HASH,&H))) {
    err("Can't open %s as %s\n",s,new ? "new file" : "existing file");
    return 0;
  }
  H.cachesize=0;

  h->n=strdup(s);
  return 1;

}

int
hash_close(Hash *h) {

  DB *d=h->d;

  if (d  && d->close(d)) {
    err("Can't close hash %p\n",d);
    return 0;
  }

  if (h->n)
    free((void *)h->n);
  
  memset(h,0,sizeof(*h));

  return 1;

}

Hash_totals *
hash_get_totals(Hash *h) {

  DBT key={(void *)&Totals_key,sizeof(Totals_key)},dd;
  DB *d=h->d;

  switch(d->get(d,&key,&dd,0)) {
  case 0:
    return (Hash_totals *)dd.data;
    break;
  default:
    err("hash get totals error %s\n",h->n);
  case 1: 
    return NULL;
    break;
  }

}

int
hash_put_totals(Hash *h,Hash_totals *ht) {

  DBT key={(void *)&Totals_key,sizeof(Totals_key)},dd={ht,sizeof(*ht)};
  DB *d=h->d;
  int i=1;

  switch(d->put(d,&key,&dd,0)) {
  case 0:
    break;
  case 1:
    i=0;
    err("Unexplained hash put totals error: %s\n",h->n);
    break;
  default:
    i=0;
    err("hash put totals error %s\n",h->n);
    break;
  }

  return i;

}

Datum *
hash_get(Hash *h,const Gen *g) {

  DBT dd;
  DB *d=h->d;

  switch(d->get(d,(DBT *)g,&dd,0)) {
  case 0:
    return (Datum *)dd.data;
    break;
  default:
    err("hash get error\n");
  case 1: 
    return NULL;
    break;
  }

}  

Datum *
hash_seq(Hash *h,Gen *g) {

  DBT dd;
  DB *d=h->d;

  switch(d->seq(d,(DBT *)g,&dd,R_NEXT)) {
  case 0:
    if (dd.size!=sizeof(Datum))
      return hash_seq(h,g);

    return (Datum *)dd.data;
    break;
  default:
    err("hash seq error\n");
  case 1:
    return NULL;
    break;
  }
    
}

int
hash_put(Hash *h,const Gen *g,Datum *n) {

  DBT dd={n,sizeof(*n)};
  DB *d=h->d;
  int i=1;

 switch(d->put(d,(DBT *)g,&dd,0)) {
  case 0:
    break;
  case 1:
    i=0;
    err("Unexplained hash put error\n");
    break;
  default:
    i=0;
    err("hash put error\n");
    break;
  }

 return i;

}

int
hash_sync(Hash *h) {

  DB *d=h->d;
  int i=1;

  switch (d->sync(d,0)) {
  case 0:
    break;
  default:
    i=0;
    err("db sync error\n");
    break;
  }

  return i;

}

int
hash_perf(Hash *h,unsigned ss) {

  unsigned j,n;
  static char nn;
  struct timeval tv,tv1;
  float f;
  Map b={0},k={0};
  const char z=0,**k1,**kk,**ke;
  Gen g;
  Hash_totals *ht;

  if (!(ht=hash_get_totals(h)))
    return 0;

  if (!map_get(&b,0,(ht->nc+ht->nw)*sizeof(char)))
    return 0;

  if (!map_get(&k,0,ht->nw*sizeof(char *)))
    return 0;

  for (;hash_seq(h,&g);) 
    if (!map_write_element(&k,b.m) ||
	!map_write_gen(&b,g) ||
	!map_write_element(&b,z))
      return 0;

  if (!nn) {

    time_t tt;

    srand((unsigned)time(&tt));
    nn=1;
  }

  gettimeofday(&tv,NULL);

  k1=k.m1;
  ke=k.m;
  
  for (n=0;n<ss;n++) {

    j=(ke-k1)*rand()/RAND_MAX;
    j=j>=ke-k1 ? ke-k1-1 : j;
    kk=k1+j;

    g.v=(void *)*kk;
    g.w=strlen(*kk);
    if (!hash_get(h,&g)) {
      err("Can't retrieve %*.*s\n",g.w,g.w,g.v);
      return 0;
    }

  }

  gettimeofday(&tv1,NULL);
  f=(tv1.tv_sec-tv.tv_sec)+1.0e-6*(tv1.tv_usec-tv.tv_usec);

  inf("done\n%d retrievals in %e sec, %e usec/retrieval\n",
	 n,f,1.0e6*f/n);

  return 1;

}

int
hash_opt(Hash *h) {

  unsigned as,ms1;
  static unsigned ps;
  Hash h1;
  Datum *n;
  struct stat ss;
  HASHINFO H1;
  Hash_totals *ht;
  Gen g;

  if (!ps)
    ps=getpagesize();

  if (!hash_sync(h))
    return 0;

  ss.st_size=0;
  if (h->n && stat(h->n,&ss)) {
    err("Can't stat %s\n",h->n);
    return 0;
  }

  inf("Current size: %ld\n",ss.st_size);

  if (!(ht=hash_get_totals(h)))
    return 0;

  as=2*sizeof(DBT)+sizeof(Datum)+ht->nc/ht->nw+1;
  ms1=ht->ms+1+2*sizeof(DBT)+sizeof(Datum);

  H1=H;
  for (H.bsize=64;H.bsize<ms1;H.bsize+=H.bsize);
  for (;H.bsize<2*as;H.bsize=H.bsize+H.bsize);
  for (H.ffactor=2;H.ffactor<H.bsize/as;H.ffactor+=H.ffactor);
  H.nelem=1;
  H.cachesize=ht->nw*(2*sizeof(DBT)+sizeof(Datum)+1)+ht->nc;
  H.cachesize+=2*sizeof(DBT)+sizeof(Totals_key)+sizeof(Hash_totals);
  H.cachesize/=ps;
  H.cachesize++;
  H.cachesize*=ps;
  H.cachesize=0;
  inf("New hash: %d %d %d %d\n",H.bsize,H.ffactor,H.nelem,H.cachesize);
  if (h->n && rename(h->n,".hash.old")) {
    err("Can't rename %s to .hash.old\n",h->n);
    return 0;
  }

  if (!hash_open(&h1,h->n,1))
    return 0;

  H=H1;

  while ((n=hash_seq(h,&g))) 
    if (!hash_put(&h1,&g,n))
      return 0;
  
  if (!(ht=hash_get_totals(h)))
    return 0;
  if (!hash_put_totals(&h1,ht))
    return 0;

  h1.n=strdup(h->n);

  if (!hash_close(h))
    return 0;

  *h=h1;

  if (unlink(".hash.old")) {
    err("Can't unlink .hash.old in hash_opt\n");
    return 0;
  }
  
  if (!hash_sync(h))
    return 0;

  ss.st_size=0;
  if (h->n && stat(h->n,&ss)) {
    err("Can't stat %s\n",h->n);
    return 0;
  }

  inf("Final size: %ld\n",ss.st_size);

  return 1;

}

