/*

  Copyright (C) 2000, The MITRE Corporation

  Use of this software is subject to the terms of the GNU General
  Public License version 2.

  Please read the file LICENSE for the exact terms.

*/

/*
 *
 * Author: Mike Butler, mgb@mitre.org
 *
 * $Id: UtString.C,v 1.13 1999/12/08 19:21:00 mgb Exp $
 */
#include <UtString.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>		// for sprintf()
#include <string.h>		// for strerror()

ostream &operator<<(ostream &os, const String &s)
{
  for(size_t i = 0; i < s.cLen; i++) os << s.cBuf[i];
  return(os);
}
/* Read from stream - try to mimic (is >> (char*)) */
istream &operator>>(istream &is, String &s)
{
  s = "";
  char c;
  while(is && (c = is.get()) && !is.eof()) {
    switch(c) {
    case '\000':
    case '\t': // ^I
    case '\n': // ^J
    case '\f': // ^L
    case '\r': // ^M
    case ' ':
      if(s.length()) return(is);
      break;
    default:
      s.append(&c, 0, 1);
    }
  }
  return(is);
}
istream &getline(istream &is, String &s, char term)
{
  s.GetLine(is, term); return(is);
}

// User better provide enough space for this!
size_t String::Export(char *to, size_t len) const
{
  char *s = cBuf;
  if (len > cLen) len = cLen;
  size_t limit = len;
  while(limit--) *to++ = *s++;
  *to = 0;			// Null terminate...
  return(len);
}

// Can we NIX this method?  (No, used internally)
size_t String::Copy(const char *from, char *to, int len)
{
  int l = len;
  while(l--) *to++ = *from++;
  if(to) *to = 0;
  return(len);
}

// Can we NIX this method?
size_t String::Strlen(const char *s)
{
  if(!s) return(0);
  int i = 0;
  while(*s++) i++;
  return(i);
}
void String::GetLine(istream &is, char t)
{
  char c;
  while(is && ((c = is.get()) != t) && (!is.eof()))
    append(&c, 0, 1);
}
// Case sensitive and insensitive compare of two strings...
// Returns:
//   -1 --> s1 <  s2
//    0 --> s1 == s2
//    1 --> s1 >  s2
// A longer string is greater than a shorter one if they're otherwise equal.
int String::Compare(const char *s1, const char *s2, size_t l1, size_t l2, bool foldcase = false) const
{
  /* handle degenerate cases... */
  if(s1 == s2) return(0);
  if(!s1) return(-1);
  if(!s2) return(1);

  /* Pre: A, B point to ASCII strings. */

  static char map[256], init=0;
  if(!init) {
    for(int i = 0; i < 256; i++) {
      map[i] = isalpha(i) ? tolower(i) : i;
    }
    init++;
  }

  size_t l = (l1 < l2) ? l1 : l2;

  /* Do compare... */
  if(l) {
    ++l; --s1; --s2;
    if(foldcase) 
      while(--l && (map[*(++s1)] == map[*(++s2)])) ;
    else 
      while(--l && (*(++s1) == *(++s2))) ;
  }

  /* Here's a simpler but 20% slower version of compare... */
  // while(l && (map[*s1] == map[*s2])) { ++s1; ++s2; --l; }

  if(l) 
    if(foldcase) return((map[*s1] < map[*s2]) ? -1 : 1);
    else return((*s1 < *s2) ? -1 : 1);

  if(l1 == l2) return(0);
  return((l1 < l2) ? -1 : 1);
}

/* Always leaves room for a null terminator! */
bool String::Realloc(size_t n)
{
  assert(n >= cLen);
  n = (n + 0x10) & ~0xf; /* Allocate in 16-byte chunks */
  char *buf = new char[n+1];
  Copy(cBuf, buf, cLen);
  if(cBuf) delete [] cBuf;
  cBuf = buf;
  cMax = n;
  return(true);
}

String &String::operator=(const String &o)
{
  if(this == &o) return(*this);
  cMax = cLen = 0;
  if(cBuf) delete [] cBuf;
  cBuf = 0;
  append(o);
  return(*this);
}

void String::Init() { cBuf = 0; cLen = cMax = 0; }

String::String(const char *s, size_t start, size_t n)
{
  Init();
  if(!s) return;
  if(n == npos) n = Strlen(s) - start;
  Realloc(n);
  cLen = Copy(s + start, cBuf, n);
}

String::String(char c, size_t count)
{
  Init();
  Realloc(count);
  cLen = count;
  while(count--) cBuf[count] = c;
}

String::String(int v, const String &f="%d")
{
  Init();
  (*this) = Convert(v,f);
}

String::String(size_t v, const String &f="%d")
{
  Init();
  (*this) = Convert((long)v,f);
}

String::String(double v, const String &f="%f")
{
  Init();
  (*this) = Convert(v,f);
}

String::String(long v, const String &f="%ld")
{
  Init();
  (*this) = Convert(v,f);
}

String::String()
{
  Init();
}

String::String(const String&o, size_t start, size_t n)
{
  Init();
  append(o, start, n);
}
String::~String()
{
  if(cBuf) delete [] cBuf; 
  cLen = 0;
  cMax = 0;
  cBuf = 0;
}

size_t String::copy(char *cb, size_t n, size_t pos) const
{
  if(n == npos) n = cLen - pos;
  if((!cb) || (!n)) return(0);
  memcpy(cb, cBuf+pos, n);
  return(n);
}

String &String::append(const String &s, size_t start, size_t n)
{
  if(start > s.cLen) return(*this);
  if(n > s.cLen - start) n = s.cLen - start;
  append(s.cBuf, start, n);
  return(*this);
}
String &String::append(const char *s, size_t start, size_t n)
{
  if(n == npos) n = Strlen(s) - start;
  if((cLen + n) > cMax) Realloc(cLen + n);
  cLen += Copy(s + start, cBuf + cLen, n);
  return(*this);
}

String String::operator+(const char *o) const
{
  String s = (*this);
  s.append(o);
  return(s);
}
String String::operator+(const String&o) const
{
  String s = (*this);
  s.append(o);
  return(s);
}
String String::operator+(int i) const
{
  String s = (*this);
  s.append(String::Convert(i));
  return(s);
}
String &String::operator+=(const String&o)
{
  append(o);
  return(*this);
}
String &String::operator+=(const char *o)
{
  append(o); return(*this);
}

String &String::operator+=(const char o)
{
  append(&o, 0, 1); return(*this);
}

String String::substr(size_t pos, size_t n) const
{
  return(String(*this, pos, n));
}

char &String::operator[](size_t pos)
{
  if((!cBuf) || (pos > cMax)) Realloc(pos); 
  if(pos >= cLen) cLen = pos + 1;
  return(cBuf[pos]);
}

char String::operator[](size_t pos) const
{
  if((!cBuf) || (pos > cMax)) return(0); return(cBuf[pos]);
}

int String::operator==(const char *s) const
{
  return(!Compare(cBuf, s, cLen, Strlen(s)));
}

int String::operator==(const String&o) const
{
  return(!Compare(cBuf, o.cBuf, cLen, o.cLen));
}

int String::operator<(const char *s) const
{
  return(Compare(cBuf,  s, cLen, Strlen(s)) < 0);
}

int String::operator<(const String&o) const
{
  return(Compare(cBuf, o.cBuf, cLen, o.cLen) < 0);
}

size_t String::length() const
{
  return(cLen);
}

size_t String::find(const String &p, size_t base) const
{
  size_t i = 0;
  while(((i+base) < cLen) && (i < p.cLen)) {
    if(cBuf[i+base] == p.cBuf[i]) {
      ++i;
      continue;
    }
    /* No match! Start again! */
    ++base;
    i = 0;
  }
  if((i == p.cLen) && (i+base) <= cLen) return(base);
  return(npos);
}

/// Find first character in string which is not a character in s.
size_t String::find_first_of(const String &s, size_t base) const
{
  for(size_t i = base; i < cLen; i++)
    for(size_t j = 0; j < s.cLen; j++)
      if(cBuf[i] == s.cBuf[j]) return(i);

  return(npos);
}

/// Find first charavter string which is not a character in s.
size_t String::find_first_not_of(const String &s, size_t base) const
{
  size_t i = base, j = 0;
  while(i < cLen) {
    if(cBuf[i] == s.cBuf[j]) {
      ++i;
      j = 0;
      continue;
    }
    if(++j == s.cLen) return(i);
  }
  return(npos);
}

const char *String::c_str() const
{
  return((cBuf) ? cBuf : "");
}

// Ugh, breaks encapsulation, but very useful!
char *String::GetRawBuf()
{
  return(cBuf);
}

/* Force realloc to specified size... */
void String::reserve (size_t siz)
{
  size_t tmpLen = cLen;
  operator[](siz);
  cLen = tmpLen;
}

long String::Unpack(size_t len, size_t offset, bool extend)
{
  if(!len) return(0);
  /* Preconditions...... */
  assert(len <= sizeof(long));
  assert((len + offset) <= cLen);
  
  char *c = &cBuf[offset];
  long result = (extend && ((*c) & 0x80)) ? -1 : 0;
  
  while(len--) 
    result = (result << 8) + *(c++);

  return(result);
}

String String::MakeAndPack(long data, size_t len, size_t offset)
{
  String s;
  s.Pack(data, len, offset);
  return(s);
}

void String::Pack(long data, size_t len, size_t offset)
{
  /* By default, pack at end of string */
  if(offset == npos) offset = cLen;
  /* Don't pack try to pack too much... */
  assert(len <= sizeof(long));
  
  size_t last = offset + len;
  if(last > cMax) Realloc(last);
  if(last > cLen) cLen = last;
  char *c = cBuf + last;
  while(len--) {
    *(--c) = data;
    data >>= 8;
  }

}

/* Appends to string, returns false when eof hit... */
bool String::Getline(istream &is, char term = '\n') 
{ 
  cLen = 0;			// Lose original string
  if(cBuf) cBuf[cLen] = 0;	// Force null (if empty string)
  char c;
  while(is && ((c = is.get()) != term) && (!is.eof())) {
    append(&c, 0, 1);
  }
  return(!is.eof());
}

/* Load an entire file into a string...
 * Returns true if loaded, false otherwise.
 * Of false return, string contains text of error.
 */
bool String::LoadFile(const String &path)
{
  int ifd = open(path.c_str(), O_RDONLY);
  if(ifd < 0) {
    *this = strerror(errno);
    return(false);
  }

  struct stat fs;
  if(fstat(ifd, &fs) < 0) {
    *this = strerror(errno);
    close(ifd);
    return(false);
  }

  /* Read file on one big chunk... */
  Realloc(fs.st_size);

  size_t need = fs.st_size, have = 0;
  while(need != have) {
    size_t got = read(ifd, &cBuf[have], fs.st_size);
    if(got <= 0) {
      close(ifd);
      *this = strerror(errno);
      return(false);
    }
    have += got;
  }

  cLen = have;
  return(true);
}
 // initialize encode/decode tables...
// encode table must be 64 bytes, decode is 256...
void String::Base64Init(unsigned char *encode, unsigned char *decode)
const {
  unsigned char tmp[256];	
  if(!encode) encode = tmp;	// Needed for decode table generation...
  int i;
  for(i = 0; i < 26; i++) encode[   i] = 'A'+i; 
  for(i = 0; i < 26; i++) encode[26+i] = 'a'+i;
  for(i = 0; i < 10; i++) encode[52+i] = '0'+i;
  encode[62] = '+';
  encode[63] = '/';

  if(!decode) return;
  for(i = 0; i < 64; i++) decode[encode[i]] = i+1;
  decode['='] = 0x80;
}
// Base64Encode() - Encode a string in base64
String String::Base64Encode() const {
  const char *s = cBuf, *e = cBuf + cLen;  
  int  len = 2 * cLen + 4;
  char result[len], *d = result, *line = result;
  int buf, i, pad;
  static unsigned char sEncode[64] = { 0, };

  // Tables initialized?
  if(!sEncode['a']) Base64Init(sEncode, 0);

  if(!cLen) return("");
  while(s < e) {
    buf = pad = 0;
    // Nab 24 bits of data...
    for(i = 0; i < 3; i++) {
      buf <<= 8;
      if(s < e) buf += *s++;
      else ++pad;
    }

    // Spit it back out as four characters...
    for(i = 3; i >= 0; i--) 
      if(i < pad) *d++ = '=';
      else *d++ = sEncode[(buf >> (6*i)) & 0x3f];
    
    // Maybe break line...  (every 64 characters)
    if((d - line) > 64) {
      *d++ = '\n';
      line = d;
    }
  }
  return(String(result, 0, d-result));
}

// Base64Decode() - Convert a base64 string back to normal...
String String::Base64Decode(bool strict) const {
  const char *s = cBuf, *e = cBuf + cLen;
  char result[cLen], *d = result;
  int v, buf = 0, n = 0, pad = 0;
  static unsigned char sDecode[256] = { 0, };

  // Tables initialized?
  if(!sDecode['a']) Base64Init(0, sDecode);

  if(!cLen) return("");

  if(strict && !Base64Validate()) return("");

  do {
    if(!(v = sDecode[*s])) continue;
    ++n;
    buf = (buf << 6);
    if(v & 0x80) ++pad;
    else buf += v-1;
    // Seen a multiple of four characters?  Cough out bytes...
    if(n == 4) {
      if(--n > pad) *d++ = buf >> 16;
      if(--n > pad) *d++ = buf >> 8;
      if(--n > pad) *d++ = buf;
      n = 0;
    }
  } while(++s < e);
    
  // Decode done, return string...
  return(String(result, 0, d-result));
}
// Base64Validate() - This a reasonable looking Base64 string?
bool String::Base64Validate() const {
  const unsigned char *s = (unsigned char *)cBuf;
  const unsigned char *e = (unsigned char *)cBuf + cLen;
  int pad = 0, n = 0;  
  static unsigned char sDecode[256] = { 0, };

  // Tables initialized?
  if(!sDecode['a']) Base64Init(0, sDecode);

  for(; s < e; s++) {
    switch(sDecode[*s]) {
    case 0x80:
      // Should never have more than two pad characters...
      ++n;
      if(++pad > 2) return(false);
      break;
    case 0x00:
      // Non Base64 character, better be white space!
      switch(*s) {
      case ' ':
      case '\t':
      case '\n':
      case '\r':
      case 0x00:
	break;
      default:
	return(false);		// Not white, danger!
      }
      continue;		// So far ok...
    default:
      ++n;
      if(pad) return(false);	// Embedded pad characters?
      continue;
    }
  }

  // Good so far, count of significant characters a multiple of four?
  // If not, this is a bad string...
  return(!(n & 0x3));
}
// HexEncode() - Encode a binary string into hex
// Inserts white space to make it readable...
String String::HexEncode(bool white) const {
  const char *s = cBuf, *e = cBuf + cLen;  
  int  len = 3 * cLen + 4;
  char result[len], *d = result, *line = result;
  static char sEncode[16] = { 0, };
  
  // Init table if we haven't yet...
  if(!sEncode[0]) 
    for(int i = 0; i < 16; i++) 
      sEncode[i] = (i < 10) ? (i + '0') : (i - 10 + 'A');
  
  if(!cLen) return("");
  while(s < e) {
    *d++ = sEncode[0xf & ((*s) >> 4)];
    *d++ = sEncode[0xf & *s];
    s++;
    if(white) {
      if((d - line) > 64) {
  	*d++ = '\n';
  	line = d;
      }
    }
  }
  return(String(result, 0, d-result));
}
// HexDecode() - Convert a hex string back to binary...
//   We presume that there are an even number of hex digits...
//   (It's hard to avoid this since the string may have embedded white space)
String String::HexDecode(bool strict) const {
  const char *s = cBuf, *e = cBuf + cLen;
  char result[cLen], *d = result;
  int odd = 0;
  static unsigned char sDecode[255] = { 0, };
  *d = 0;

  // Initialize encode/decode tables on first use...
  if(!sDecode['1']) {
    size_t i;
    String white = "\f\v\n\t\r.|,:;, ";
    for(i = 0; i < 10; i++)  sDecode['0' + i] = i + 1;
    for(i = 0; i < 6; i++) sDecode['a' + i] = sDecode['A' + i] = i+11;
    for(i = 0; i < white.length(); i++) sDecode[white[i]] = 0x80;
  }

  // Walk over string nibble at a time...
  while(s < e) {
    switch(sDecode[*s]) {
    case 0x80:
      // White space, no problem...
      break;
    case 0x00:
      // Invalid character...
      if(strict) return("");
      break;
    default:
      if(odd++ & 1) 
	*d++ |= (sDecode[*s] - 1) & 0xf;
      else 
	*d = (sDecode[*s] - 1) << 4;
    }
    s++;
  }
    
  // Handle odd byte case (pad with zero nibble)
  if(odd & 1) d++;

  // Decode done, return string...
  return(String(result, 0, d-result));
}

 // Take input (even if from stdin) and make it all one big string...
bool String::ReadToEof(istream &is) {
  while(is) {
    char buf[100];
    is.read(buf, 100);
    (*this) += String(buf, 0, is.gcount());
  }
  return(true);
}
// Match() - determins if string matches a glob expression...
/* Expression matching glob style...
 * Matches only * and ? in regexp, does not handle quoting.
 */
bool String::Match(const String &pat, bool fold = false) const {
  size_t i = 0, j = 0, x = 0, w = 0;
  while(i < cLen) {
    if(j < pat.cLen) {
      if(pat.cBuf[j] == '*')     { j++; w = j; x = i; continue; }
      if(pat.cBuf[j] == '?')     { i++; j++; continue; }
      if(pat.cBuf[j] == cBuf[i]) { i++; j++; continue; }
      // No match...  Maybe map case?
      if(fold &&
	 isalpha(pat.cBuf[j]) && 
	 isalpha(cBuf[i]) &&
	 tolower(cBuf[i]) == tolower(pat.cBuf[j])) { i++; j++; continue; }
    }
    // Have wild card?  Back up try again...
    if(w) { j = w; i = ++x; continue; }
    // Can't be matched!
    return(false);
  }
  // Gobble trailing stars...
  while((j < pat.cLen) && (pat.cBuf[j] == '*')) j++;
  return((i == cLen) && (j == pat.cLen));
}
// Chomper() - Chew training characters off ends of string...
String String::Chomper(const String &white, bool left) const {
  char junk[256];
  size_t n;
  memset(junk, 0, sizeof(junk));
  // Make junk table to simplify searching...
  for(n = 0; n < white.cLen; n++) 
    junk[white.cBuf[n]]++;

  if(left) {
    for(n = 0; n < cLen; n++) if(!junk[cBuf[n]]) break;
    return(substr(n));
  } else {
    // Now mung string...  (in place?!)
    for(n = cLen; n; n--) 
      if(!junk[cBuf[n-1]]) break;
    return(substr(0, n));
  }
}
 // IsPrintable() - True if the string is all printable characters...
bool String::IsPrintable() const {
  size_t i;
  for(i = 0; i < cLen; i++) 
    if(!(isgraph(cBuf[i]) || isspace(cBuf[i]))) return(false);
  return(true);
}

// ToUpper() - Convert string to uppercase characters...
String String::ToUpper() const {
  size_t i;
  char result[cLen];
  for(i = 0; i < cLen; i++) 
    result[i] = islower(cBuf[i]) ? toupper(cBuf[i]) : cBuf[i];
  return(String(result, 0, cLen));
}

// ToLower() - Convert string to lower characters...
String String::ToLower() const {
  size_t i;
  char result[cLen];
  for(i = 0; i < cLen; i++) 
    result[i] = isupper(cBuf[i]) ? tolower(cBuf[i]) : cBuf[i];
  return(String(result, 0, cLen));
}
// FormatParagraph() - Reformat string to have reasonable line breaks
// Len is the bogie for how long a line is, force allows words
// to be broken in the middle if they're longer than len.
// Note: will not remove multiple white spaces, 
// but will change them to spaces...
// (tabs, multiple spaces, etc, are munged!)
String String::BreakLines(int len = 80, bool force = true) {
  const char *s = cBuf, *e = cBuf + cLen;
  char line[2*cLen];
  char *eol = line, *bol = line, *d = line;
  String result;
  int space = 0;

  // while there's input left
  while(*s != *e) {
    // This a space? drop it!
    if(isspace(*s)) {
      space++;
      s++;
      continue;
    }
    // Not a space, new word?  Start with a space...
    if(space) {
      eol = d;
      *d++ = ' ';
      space = 0;
    }
    // First character of new word...
    *d++ = *s++;
    *d = 0;			// for debug legibility only...
    // Line full?
    if((d - bol) >= len) {
      // Yes, have a good breaking point? (or a forced one?)
      if((eol == bol) && (force)) eol = d;
      if(eol != bol) {
	// Yes, add line (sans space) to output
	result += String(bol, 0, eol - bol);
	result += "\n";
	// Move remainder of line down...
	// if eol points at a space, drop it...
	memcpy(bol, eol + 1, d - eol + 1);
	d = bol + ((d == eol) ? 0 : (d - eol - 1));
	eol = bol;
      }
    }
  }
  // Out of data, line buffer still got partial line?
  if(d != bol)
    result += String(bol, 0, d - bol);

  return(result);
}

// Occurs() - Count how many times <c> occurs in the string
size_t String::Occurs(const char c, size_t base = 0) const {
  size_t count = 0;
  for(size_t i = base; i < cLen; i++) 
    if(cBuf[i] == c) count++;
  return(count);
}
// Convert values to and from a string...
String String::Convert(long val, const String &fmt) {
  char tmp[fmt.length() + 40];
  sprintf(tmp, fmt.c_str(), val);
  return(tmp);
}
String String::Convert(double val, const String &fmt) {
  char tmp[fmt.length() + 20];
  sprintf(tmp, fmt.c_str(), val);
  return(tmp);
}
String String::Convert(int val, const String &fmt) {
  return(Convert((long)val, fmt));
}

// These beg for a template function, by format string hoses that...
// Convert string to double...
double String::Convert(double *val, bool *ok, const String &fmt) const {
  if(ok) *ok = false;
  double v, *pV = (val) ? val : &v;
  if(!cBuf) return(0);
  cBuf[cLen] = 0;		// Insure null byte...
  if(1 == sscanf(cBuf, fmt.cBuf, pV)) 
    if(ok) *ok = true;
  return((val) ? *val : v);
}
// String to long...
long String::Convert(long *val, bool *ok, const String &fmt) const {
  if(ok) *ok = false;
  long v, *pV = (val) ? val : &v;
  if(!cBuf) return(0);
  cBuf[cLen] = 0;		// Insure null byte...
  if(1 == sscanf(cBuf, fmt.cBuf, pV)) 
    if(ok) *ok = true;
  return((val) ? *val : v);
}
// String to int...
int String::Convert(int *val, bool *ok, const String &fmt) const {
  if(ok) *ok = false;
  int v, *pV = (val) ? val : &v;
  if(!cBuf) return(0);
  cBuf[cLen] = 0;		// Insure null byte...
  if(1 == sscanf(cBuf, fmt.cBuf, pV)) 
    if(ok) *ok = true;
  return((val) ? *val : v);
}

// Replace one string with another one or more times...
String String::Replace(const String &s, const String &r, size_t count) const
{
  String result;
  size_t base = 0;
  while(count--) {
    size_t begin = find(s, base);
    if(begin == npos) break;
    result += substr(base, begin - base) + r;
    base = begin + s.length();
  }
  result += substr(base, npos);
  return(result);
}

// Starting at base, find a string of this form
//    [$eow]*([~$eow]+)([$eow]|$)
// That is, skip anything in eow, then match anything not in eow
// that is terminated with either end of string or something in EOW 
String String::Token(String *rest, size_t base, const String &eow) const
{
  String result;
  size_t begin = find_first_not_of(eow, base);
  size_t end = find_first_of(eow, begin);
  result = substr(begin, (end == npos) ? npos : end - begin);
  if(rest) *rest = substr(end);
  return(result);
}

// Split string on sequence, return portion before split
String String::Split(const String &p, String *rest, size_t base) const 
{
  String result;
  size_t split = find(p, base);
  // Where does next piece (after delimeter) start?
  size_t next  = (split == npos) ? npos : (split + p.cLen);
  result = substr(base, (split == npos) ? npos : (split - base));
  if(rest) *rest = substr(next);
  return(result);
}

// IsTrue() - see if a string has a value suggesting "true"
bool String::IsTrue() {
  if(!cLen) return(false);
  switch(cBuf[0]) {
  case 'T': 
  case 't': 
  case 'Y': 
  case 'y': 
  case '1': 
    return(true);
  default:
    return(false);
  }
}


