/* format.c -- 
 * Created: Mon Jul 14 09:20:33 1997 by faith@acm.org
 * Revised: Sun Jul 20 11:20:46 1997 by faith@acm.org
 * Copyright 1997 Rickard E. Faith (faith@acm.org)
 * This program comes with ABSOLUTELY NO WARRANTY.
 * 
 * $Id: format.c,v 1.5 1997/07/20 15:21:24 faith Exp $
 * 
 */

#include "config.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <sys/time.h>

#define INCLUDE_STATE 0		/* If 1, also index "City, State".
                                   Otherwise only "City" will be indexed.
                                   This is usually clearer when
                                   searching. */

#ifdef __sparc__
extern int    atoi(char *);
extern int    printf( char *, ... );
extern int    fprintf( FILE *, char *, ... );
extern int    tolower(int);
extern int    getopt( int, char **, char * );
extern int    pclose(FILE *);
extern time_t time(time_t *);
#endif

#ifdef __linux__
#include <getopt.h>
#endif

#define PLACES   0
#define MCDS     1
#define COUNTIES 2
#define ZIPS     3
#define FINAL    4

static FILE *indexFile;
static int  hwtotal;

static unsigned char b64_list[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

/* |b64_encode| encodes |val| in a printable base 64 format.  A MSB-first
   encoding is generated. */

static const char *b64_encode( unsigned long val )
{
   static char   result[7];
   int    i;

   result[0] = b64_list[ (val & 0xc0000000) >> 30 ];
   result[1] = b64_list[ (val & 0x3f000000) >> 24 ];
   result[2] = b64_list[ (val & 0x00fc0000) >> 18 ];
   result[3] = b64_list[ (val & 0x0003f000) >> 12 ];
   result[4] = b64_list[ (val & 0x00000fc0) >>  6 ];
   result[5] = b64_list[ (val & 0x0000003f)       ];
   result[6] = 0;

   for (i = 0; i < 5; i++) if (result[i] != b64_list[0]) return result + i;
   return result + 5;
}

static void entry( const char *name, const char *state )
{
   static char prevName[1024]  = "";
   static char prevState[1024] = "";
   static long nameStart       = 0;
   static long stateStart      = 0;
   long        end;

   end = ftell(stdout);
   if (!name || strcmp(name,prevName)) {
      if (nameStart) {
	 ++hwtotal;
         fprintf( indexFile, "%s\t%s\t", prevName, b64_encode(nameStart) );
         fprintf( indexFile, "%s\n", b64_encode(end-nameStart) );
#if INCLUDE_STATE
         if (*prevState && stateStart) {
	    ++hwtotal;
            fprintf( indexFile, "%s, %s\t%s\t",
                     prevName, prevState, b64_encode(stateStart) );
            fprintf( indexFile, "%s\n", b64_encode(end-stateStart) );
         }
#endif
      }
      strcpy(prevName,name ? name : "");
      strcpy(prevState,state ? state : "");
      nameStart = end;
      stateStart = end;
   } else if (state) {
      if (strcmp(state,prevState)) {
         if (!stateStart) {
            fprintf( stderr, "no stateStart\n" );
            exit(3);
         }
#if INCLUDE_STATE
	 ++hwtotal;
         fprintf( indexFile, "%s, %s\t%s\t",
                  prevName, prevState, b64_encode(stateStart) );
         fprintf( indexFile, "%s\n", b64_encode(end-stateStart) );
#endif
         strcpy(prevState,state ? state : "");
         stateStart = end;
      }
   } else {
      strcpy(prevState,"");
      stateStart = 0;
   }

   if (hwtotal && !(hwtotal % 100))
      fprintf( stderr, "%10d headwords\r", hwtotal );
}

static void final( char *buffer )
{
   static char prevName[1024]  = "";
   static char prevState[1024] = "";
   static char prevId          = 0;
   char        buf[1024];
   char        *name           = buffer;
   char        *state;
   char        *pt             = strchr(buffer, ':');
   char        *id;
   char        *type;
   char        *placeFIPS;
   char        *pop;
   char        *units;
   char        *land;
   char        *water;
   char        *latitude;
   char        *longitude;
   char        *zip;
   char        *allocation;

   if (*name == '~') ++name;
   state = strchr(name, ',');
   *state = '\0';
   state += 2;
   
   *pt++ = '\0';
   id = strtok( pt, " " );

   if (*id == 'z'
       || (prevId == 'Z' && (strcmp(prevName,name)
                             || strcmp(prevState,state)))) {
      printf( "\n" );
   }

   if (*id == 'Z') {
      zip = strtok( NULL, " " );
      if (!strcmp(prevName,name) && !strcmp(prevState,state)) {
         if (prevId == *id)
            printf( ", %s", zip );
         else if (prevId == 'P' || prevId == 'M')
            printf( "  Zip code(s): %s", zip );
         else {
            printf( "%s, %s\n", name, state );
            printf( "  Zip code(s): %s", zip );
         }
      } else {
         entry(name,state);
         printf( "%s, %s\n", name, state );
         printf( "  Zip code(s): %s", zip );
      }
   } else {
      type       = strtok(NULL, " "); 
      placeFIPS  = strtok(NULL, " "); 
      pop        = strtok(NULL, " "); 
      units      = strtok(NULL, " "); 
      land       = strtok(NULL, " "); 
      water      = strtok(NULL, " "); 
      latitude   = strtok(NULL, " "); 
      longitude  = strtok(NULL, " ");
      allocation = strtok(NULL, " ");

      if (*id == 'z') {
         sprintf( buf, "Zip Code %s", type );
         entry( buf, NULL );
         printf( "%s\n", buf );
         printf( "  PO Name: %s (%s)\n", name, state );
         printf( "  Location: %.5f %c, %.5f %c\n",
                 fabs(atof(latitude)),
                 atof(latitude) < 0 ? 'S' : 'N',
                 fabs(atof(longitude)),
                 atof(longitude) < 0 ? 'W' : 'E' );
         printf( "  Population (1990): %d (%0.3f%% of state)\n",
                 atoi(pop), atof(allocation) * 100 );
      } else {
         entry( name, state );
         printf( "%s, %s (%s, FIPS %d)\n",
                 name, state, type, atoi(placeFIPS) );
         printf( "  Location: %.5f %c, %.5f %c\n",
                 fabs(atof(latitude)/1000000.0),
                 atof(latitude) < 0 ? 'S' : 'N',
                 fabs(atof(longitude)/1000000.0),
                 atof(longitude) < 0 ? 'W' : 'E' );
         printf( "  Population (1990): %d (%d housing units)\n",
                 atoi(pop), atoi(units) );
         printf( "  Area: %.1f sq km (land), %.1f sq km (water)\n",
                 atoi(land)/1000.0, atoi(water)/1000.0 );
      }
   }
   strcpy(prevName, name);
   strcpy(prevState, state);
   prevId = *id;
}

static void places( char *buffer, char id )
{
   char *pt;
#if 0
   char *stateFIPS = &buffer[0];
#endif
   char *placeFIPS = &buffer[3];
   char *name      = &buffer[9];
   char *state     = &buffer[76];
   char *pop       = &buffer[79]; /* Total Population (1990) */
   char *units     = &buffer[89]; /* Number of Housing Units (1990) */
   char *land      = &buffer[99]; /* thousandths of square kilometer */
   char *water     = &buffer[110]; /* thousandths of square kilometer */
   char *latitude  = &buffer[121]; /* millionths of a degree +North -South */
   char *longitude = &buffer[131]; /* millionths of a degree +East -West */
   char question[] = "?";
   char *type;

   buffer[2] = buffer[8] = buffer[75] = buffer[78] = buffer[88] = 0;
   buffer[98] = buffer[109] = buffer[120] = buffer[130] = buffer[141] = 0;

   for (pt = name + strlen(name) - 1; *pt == ' '; *pt-- = '\0');
   for (; *pop == ' '; ++pop);
   for (; *units == ' '; ++units);

   type = strrchr( name, ' ' );
   if (type) *type++ = 0;
   else      type = question;
   
   if (!strcmp(type,"division")) {
      printf( "%s Division, %s: %c %s %s %s %s %s %s %s %s\n",
              name, state, id, type, placeFIPS,
              pop, units, land, water, latitude, longitude );
   } else if (!strcmp(type,"County")) {
      printf( "%s County, %s: %c %s %s %s %s %s %s %s %s\n",
              name, state, id, "county", placeFIPS,
              pop, units, land, water, latitude, longitude );
   } else if (!strcmp(type,"Parish")) {
      printf( "%s Parish, %s: %c %s %s %s %s %s %s %s %s\n",
              name, state, id, "parish", placeFIPS,
              pop, units, land, water, latitude, longitude );
   } else {
      printf( "%s, %s: %c %s %s %s %s %s %s %s %s\n",
              name, state, id, type, placeFIPS,
              pop, units, land, water, latitude, longitude );
   }
}

static void zips( char *buffer )
{
   int  i;
   char *stateFIPS  = strtok( buffer, "," );
   char *zip        = strtok( NULL, "," );
   char *state      = strtok( NULL, "," );
   char *name       = strtok( NULL, "," );
   char *latitude   = strtok( NULL, "," );
   char *longitude  = strtok( NULL, "," );
   char *pop        = strtok( NULL, "," );
   char *allocation = strtok( NULL, "," );

   ++stateFIPS; stateFIPS[strlen(stateFIPS)-1] = 0;
   ++zip;       zip[strlen(zip)-1] = 0;
   ++state;     state[strlen(state)-1] = 0;
   ++name;      name[strlen(name)-1] = 0;
                allocation[strlen(allocation)-1] = 0;

   for (i = 1; i < strlen(name); i++) {
      if (name[i-1] != ' ') name[i] = tolower(name[i]);
   }

   if (strcmp(name,zip)) {
      printf( "%s, %s: Z %s %s -%s %s %s\n",
              name, state, zip, latitude, longitude, pop, allocation );
      printf( "~%s, %s: z %s fips %s units land water %s -%s %s\n",
              name, state, zip, pop, latitude, longitude, allocation );
   }
}


int main( int argc, char **argv )
{
   int        c;
   char       buffer[1024];
   int        type = 0;
   const char *filename = "gazetteer.index";

   while ((c = getopt(argc,argv,"pmczf")) != EOF)
      switch (c) {
      case 'p': type = PLACES;   break;
      case 'm': type = MCDS;     break;
      case 'c': type = COUNTIES; break;
      case 'z': type = ZIPS;     break;
      case 'f': type = FINAL;    break;
      default:
         fprintf( stderr, "usage: format [-pmcz]\n" );
         exit(1);
      }

   if (type == FINAL) {
      time_t t;

      sprintf( buffer, "sort -df > %s", filename );
      if (!(indexFile = popen( buffer, "w" ))) {
         fprintf( stderr, "Cannot open %s for write\n", buffer );
      }
      entry( "00-database-url", NULL );
      printf( "00-database-url\n"
              "  http://ftp.census.gov/ftp/pub/tiger/tms/gazetteer/\n" );
      entry( "00-database-short", NULL );
      printf( "00-database-short\n"
              "  U.S. Gazetteer (1990)\n" );
      entry( "00-database-long", NULL );
      printf( "00-database-log\n"
              "  U.S. Gazetteer (1990)\n" );
      entry( "00-database-info", NULL );
      time(&t);
      printf( "00-database-info\n"
              "  This file was converted from the original database on:\n"
              "    %25.25s\n\n"
              "  The original data is available from:\n"
              "    ftp://ftp.census.gov:/pub/tiger/tms/gazetteer/places.zip\n"
              "    ftp://ftp.census.gov:/pub/tiger/tms/gazetteer/mcds.zip\n"
              "    ftp://ftp.census.gov:/pub/tiger/tms/gazetteer/counties.zip\n"
              "    ftp://ftp.census.gov:/pub/tiger/tms/gazetteer/zips.zip\n\n"
              "  The original U.S. Gazetteer Place and Zipcode Files\n"
              "  are provided by the U.S. Census Bureau and are in\n"
              "  the Public Domain.\n", ctime(&t) );
   }
   
   while (fgets(buffer,1023,stdin)) {
      switch (type) {
      case MCDS:     places(buffer,'M');   break;
      case COUNTIES: places(buffer,'C');   break;
      case PLACES:   places(buffer,'P');   break;
      case ZIPS:     zips(buffer);         break;
      case FINAL:
         final(buffer);
         break;
      default:
         fprintf( stderr, "Illegal type %d\n", type );
         exit(2);
      }
   }
   
   if (type == FINAL) {
      entry(NULL,NULL);
      pclose(indexFile);
      fprintf( stderr, "%12d headwords\n", hwtotal );
   }
   
   return 0;
}
