/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                      Copyright (c) 1995,1996                          */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission to use, copy, modify, distribute this software and its    */
/*  documentation for research, educational and individual use only, is  */
/*  hereby granted without fee, subject to the following conditions:     */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*  This software may not be used for commercial purposes without        */
/*  specific prior written permission from the authors.                  */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                       Author :  Paul Taylor and Simon King            */
/*                       Date   :  June 1995                             */
/*-----------------------------------------------------------------------*/
/*                  Stream class auxiliary routines                      */
/*                                                                       */
/*=======================================================================*/
#include <stdlib.h>
#include <iostream.h>
#include <fstream.h>
#include <math.h>
#include "EST_types.h"
#include "ling_class/EST_Stream.h"
#include "ling_class/EST_stream_aux.h"
#include "EST_string_aux.h"
#include "EST_io_aux.h"
#include "EST_Option.h"
#include "EST_Token.h"

// now in hash_sstreamp_t.cc
//#if defined(INSTANTIATE_TEMPLATES)
//#include "../base_class/EST_THash.cc"
//template class EST_TStringHash<EST_Stream*>;
//template class EST_THash<EST_String,EST_Stream*>;
//template class EST_Hash_Pair<EST_String,EST_Stream*>;
//EST_Stream* EST_THash<EST_String,EST_Stream*>::Dummy_Value = NULL;
//#endif

static int is_in_class(const EST_String &name, EST_StrList &s);

void quantize(EST_Stream &a, float q)
{
    EST_Stream_Item *a_ptr;
    float end;

    for (a_ptr = a.head(); a_ptr != 0; a_ptr = next(a_ptr))
    {
	end = a_ptr->end() / q;
	end = rint(end);
	end = end * q;
	a_ptr->set_end(end);
    }
}

// edit labels using a sed file to do the editing

int edit_labels(EST_Stream &a, EST_String sedfile)
{
    EST_Stream_Item *a_ptr;
    char command[100], name[100], newname[100], sf[100];
    FILE *fp;
    strcpy(sf, sedfile);
    EST_String file1, file2;
    file1 = make_tmp_filename();
    file2 = make_tmp_filename();

    fp = fopen(file1, "wb");
    if (fp == NULL)
    {
	fprintf(stderr,"edit_labels: cannot open \"%s\" for writing\n",
		(const char *)file1);
	return -1;
    }
    for (a_ptr = a.head(); a_ptr != 0; a_ptr = next(a_ptr))
    {
	strcpy(name,  a_ptr->name());
	fprintf(fp, "%s\n", name);
    }
    fclose(fp);
    strcpy(command, "cat ");
    strcat(command, file1);
    strcat(command, " | sed -f ");
    strcat(command, sedfile);
    strcat(command, " > ");
    strcat(command, file2);

    printf("command: %s\n", command);
    system(command);

    fp = fopen(file2, "rb");
    if (fp == NULL)
    {
	fprintf(stderr,"edit_labels: cannot open \"%s\" for reading\n",
		(const char *)file2);
	return -1;
    }
    for (a_ptr = a.head(); a_ptr != 0; a_ptr = next(a_ptr))
    {
	fscanf(fp, "%s", newname);
//	cout << "oldname: " << a_ptr->name() << " newname: " << newname << endl;
	a_ptr->set_name(newname);
    }
    fclose(fp);
    return 0;
}

// make new EST_Stream from start and end points.
void extract(const EST_Stream &orig, float start, float end, EST_Stream &ex)
{
    EST_Stream_Item *a;
    EST_Stream_Item tmp;

    for (a = orig.head(); a != 0; a = next(a))
	if ((a->end() > start) && (a->start() < end))
	{
	    if ((a->end() > end))
	    {
		tmp = *a;
		tmp.set_end(end);
		ex.append(tmp);
	    }
	    else
		ex.append(*a);
	}

}

void merge_all_label(EST_Stream &seg, const EST_String &labtype)
{
    EST_Stream_Item *a_ptr, *n_ptr;
    (void)labtype;  // unused parameter

    for (a_ptr = seg.head(); a_ptr != seg.tail(); a_ptr = n_ptr)
    {
	n_ptr = next(a_ptr);
	if (a_ptr->name() == next(a_ptr)->name())
	    seg.remove(a_ptr);
    }
}

void change_label(EST_Stream &seg, const EST_String &oname, 
		  const EST_String &nname)
{
    EST_Stream_Item *a_ptr;

    for (a_ptr = seg.head(); a_ptr != 0; a_ptr = next(a_ptr))
	if (a_ptr->name() == oname)
	    a_ptr->set_name(nname);
}

void change_label(EST_Stream &seg, const EST_StrList &oname, 
		  const EST_String &nname)
{
    EST_Stream_Item *a_ptr;
    EST_Litem *p;

    for (a_ptr = seg.head(); a_ptr != 0; a_ptr = next(a_ptr))
	for (p = oname.head(); p ; p = next(p))
	    if (a_ptr->name() == oname(p))
		a_ptr->set_name(nname);
}

static int is_in_class(const EST_String &name, EST_StrList &s)
{
    EST_TBI *p;

    for (p = s.head(); p; p = next(p))
	if (name == s(p))
	    return TRUE;
    
    return FALSE;
}

int check_vocab(EST_Stream &a, EST_StrList &vocab)
{
    EST_Stream_Item *s;
    for (s = a.head(); s; s = next(s))
	if (!is_in_class(s->name(), vocab))
	{
	    cerr<<"Illegal entry in file " <<a.name()<< ":\""  << *s << "\"\n";
	    return -1;
	}
    return 0;
}

void convert_to_broad_class(EST_Stream &seg, const EST_String &class_type, 
			   EST_Option &options)
{
    // class_type contains a list of whitepsace separated segment names.
    // This function looks at each segment and adds a feature "pos"
    // if its name is contained in the list.
    EST_String tmp_class_type = class_type + "_list";
    EST_String bc_list(options.val(tmp_class_type, 1));
    EST_StrList pos_list;
    EST_TokenStream ts;
    
    ts.open_string(bc_list);
    while (!ts.eof())
        pos_list.append(ts.get().string());

    convert_to_broad(seg, pos_list);
} 

void convert_to_broad(EST_Stream &seg, EST_StrList &pos_list, 
		      EST_String broad_name, int polarity)
{
    EST_Stream_Item *a_ptr;
    if (broad_name == "")
	broad_name = "pos";

    for (a_ptr = seg.head(); a_ptr != 0; a_ptr = next(a_ptr))
	if (is_in_class(a_ptr->name(), pos_list))
	    a_ptr->f.set(broad_name, (polarity) ? 1 : 0);
	else
	    a_ptr->f.set(broad_name, (polarity) ? 0 : 1);
} 

void label_map(EST_Stream &seg, EST_Option &map)
{
    EST_Stream_Item *a_ptr;
    int end = 0;
    
    for (a_ptr = seg.head(); a_ptr != 0; )
    {
	if (map.present(a_ptr->name()))
	{
	    if (map.val(a_ptr->name()) == "!DELETE")
	    {
		if (next(a_ptr) == 0)
		    end = 1;
		a_ptr = seg.remove(a_ptr);
		if ((a_ptr == 0) && (end == 0))
		{
		    a_ptr = seg.head();
		    continue;
		}
	    }
	    else
		a_ptr->set_name(map.val(a_ptr->name()));
	}
	a_ptr = next(a_ptr);
    }
} 

void shift_label(EST_Stream &seg, float shift)
{
    //shift every end time by adding x seconds.
    EST_Stream_Item *a_ptr;
    
    for (a_ptr = seg.head(); a_ptr != 0; a_ptr = next(a_ptr))
	a_ptr->set_end(a_ptr->end() + shift);
}

void StreamList_select(EST_StreamList &mlf, EST_StrList filenames, bool
			exact_match)
{
    // select only files in 'filenames'
    // remove all others from mlf
    EST_TBI *fptr, *ptr;
    bool flag;
    
    // if not exact match, only match basenames
    EST_StrList tmp_filenames;
    for (ptr = filenames.head(); ptr != NULL; ptr = next(ptr))
	if(exact_match)
	    tmp_filenames.append( filenames(ptr) );
	else
	    tmp_filenames.append( basename(filenames(ptr)) );
    
    for(fptr=mlf.head(); fptr != NULL;)
    {
	flag=false;
	for (ptr = tmp_filenames.head(); ptr != NULL; ptr = next(ptr))
	    if(exact_match)
	    {
		if(tmp_filenames(ptr) == mlf(fptr).name())
		{
		    flag=true;
		    break;
		}
	    }
	    else if(mlf(fptr).name().contains(tmp_filenames(ptr)))
	    {
		flag=true;
		break;
	    }
	
	if(!flag)
	{
	    fptr = mlf.remove(fptr);
	    
	    if(fptr==0)			// must have removed head of list
		fptr=mlf.head();
	    else
		fptr=next(fptr);
	}
	else
	    fptr=next(fptr);
    }
    tmp_filenames.clear();
}

// look for a single file called "filename" and make a EST_Stream out of
// this
EST_Stream StreamList_extract(EST_StreamList &mlf, const EST_String &filename, bool base)
{
    EST_TBI *p;
    EST_String test, ref;

    if (base)
	for (p = mlf.head(); p; p = next(p))
	{
	    if (basename(mlf(p).name(), "*")==basename(filename, "*"))
		return mlf(p);
	}
    else 
	for (p = mlf.head(); p; p = next(p))
	{	
	    if (basename(mlf(p).name()) == filename)
		return mlf(p);
	}
    
    cerr << "No match for file " << filename << " found in mlf\n";
    EST_Stream d;
    return d;
}

// combine all stream in MLF into a single stream. 
EST_Stream StreamList_combine(EST_StreamList &mlf)
{
    EST_TBI *p;
    EST_Stream all;
    EST_Stream_Item *s, t;
    float last = 0.0;

    for (p = mlf.head(); p; p = next(p))
    {
	for (s = mlf(p).head(); s; s = next(s))
	{
	    t.set_name(s->name());
	    t.set_end(s->end() + last);
	    all.append(t);
	    cout << "appended t " << t << endl;
	}
	last = t.end();
    }
    return all;
}

EST_Stream StreamList_combine(EST_StreamList &mlf, EST_Stream &key)
{
    EST_TBI *p;
    EST_Stream all;
    EST_Stream_Item *s, t, *k;
    float start;

    if (key.length() != mlf.length())
    {
	cerr << "StreamList has " << mlf.length() << " elements: expected "
	    << key.length() << " from key file\n";
	return all;
    }
    
    for (k = key.head(), p = mlf.head(); p; p = next(p), k = next(k))
    {
	start = k->start();
	for (s = mlf(p).head(); s; s = next(s))
	{
	    t.set_name(s->name());
	    t.set_end(s->end() + start);
	    all.append(t);
	}
    }
    return all;
}

int stream_divide(EST_StreamList &slist, EST_Stream &lab, EST_Stream &keylab,
		  EST_StrList &blank, EST_String ext)
{ // divides a single stream into multiple chunks according to the
    // keylab stream. If the keylab boundary falls in the middle of a label,
    // the label is assigned to the chunk which has the most overlap with
    // it. Some labels may be specified in the "blank" list which means thy
    // are duplicated across boundaries.
    
    EST_Stream a, newkey;
    EST_Stream_Item *s, *k, t, *n;
    EST_String filename;
    float kstart;
    
    slist.clear();
    
    if ((keylab.tail())->end() < (lab.tail())->end())
    {
	cerr << "Key file must extend beyond end of label file\n";
	return -1;
    }
    

    // find a the first keylab that will make a non-empty file
    for (k = keylab.head(); k ; k = next(k))
	if (k->end() > lab.head()->end())
	    break;

    filename = k->f("file");
    a.set_name(filename + ext);
    kstart = 0.0;
    
    for (s = lab.head(); s; s = next(s))
    {
	n = next(s);
	if (n == 0)
	{
	    t = *s;
	    t.set_end(s->end() - kstart);
	    a.append(t);
	    break;
	}
	if (n->end() > k->end())
	{
	    if (((n->end() - k->end()) < (k->end() - n->start())) || 
		is_in_class(n->name(), blank))
	    {
		t = *s;
		t.set_end(s->end() - kstart);
		a.append(t);
		t = *n;
		t.set_end(k->end() - kstart);
		a.append(t);
		if (!is_in_class(n->name(), blank))
		    s = next(s);
	    }
	    else
	    {
		t = *s;
		t.set_end(k->end() - kstart);
		a.append(t);
	    }
	    
	    slist.append(a);
	    k = next(k);
	    kstart = k->start();
	    a.clear();
	    filename = k->f("file");
	    a.set_name(filename + ext);
	}
	else
	{
	    t = *s;
	    t.set_end(s->end() - kstart);
	    a.append(t);
	}
    }
    slist.append(a);
    
    return 0;
}

int stream_divide2(EST_StreamList &mlf, EST_Stream &lab, EST_Stream &keylab,
		   EST_String ext)
{
    EST_Stream a, newkey;
    EST_Stream_Item *s, *k, t;
    float kstart;
    
    mlf.clear();
    
    if ((keylab.tail())->end() < (lab.tail())->end())
    {
	cerr << "Key file must extend beyond end of label file\n";
	return -1;
    }
    
    k = keylab.head();
    a.set_name(k->name() + ext);
    kstart = 0.0;
    
    for (s = lab.head(); s; s = next(s))
    {
	t.set_name(s->name());
	t.set_end(s->end() - kstart);
	a.append(t);
	
	if (s->end() > k->end())
	{
	    cout << "appending " << a;
	    mlf.append(a);
	    
	    kstart = s->end();
	    k->set_end(s->end());
	    k = next(k);
	    a.clear();
	    a.set_name(k->name() + ext);
	}
    }
    cout << "appending " << a;
    mlf.append(a);
    
    return 0;
}

EST_TBI *StreamList_ptr_extract(EST_StreamList &mlf, const EST_String &filename, bool base)
{
    EST_TBI *p;
    EST_String test, ref;
    
    if (base)
	for (p = mlf.head(); p; p = next(p))
	{
	    if (basename(mlf(p).name(), "*")==basename(filename, "*"))
		return p;
	}
    else 
	for (p = mlf.head(); p; p = next(p))
	    if (mlf(p).name() == filename)
		return p;
    
    cerr << "No match for file " << filename << " found in mlf\n";
    return 0;
}

void stream_convert(EST_Stream &lab, EST_Option &al, EST_Option &op)
{
    if (al.present("-shift"))
	shift_label(lab, al.fval("-shift"));
    
    // fix option later.    
    if (al.present("-extend"))
	al.override_fval("-length", al.fval("-extend",0) * lab.tail()->end());
    
    // quantize (ie round up or down) label times
    if (al.present("-q"))
	quantize(lab, al.fval("-q"));
    
    if (al.present("-start"))
    {
	if (!al.present("-end"))
	    cerr << "-start option must be used with -end option\n";
	else 
	    extract(lab, al.fval("-start"), al.fval("-end"), lab);
    }
    
    if (al.present("-class"))
	convert_to_broad_class(lab, al.val("-class"), op);
    
    else if (al.present("-pos"))
    {
	EST_StrList bclass;
	StringtoStrList(al.val("-lablist"), bclass);
	convert_to_broad(lab, bclass);
    }
    else if (al.present("-sed"))
	edit_labels(lab, al.val("-sed"));
    else if (al.present("-map"))
    {
	EST_Option map;
	if (map.load(al.val("-map")) != format_ok)
	    return;
	label_map(lab, map);
    }
}

void print_stream_features(EST_Stream &stream)
{
    EST_Stream_Item *s;
    EST_TBI *p;
    for (s = stream.head(); s; s = next(s))
    {
	cout << s->name() << "\t:";
	for (p = s->f.head(); p; p = next(p))
	    cout << s->f.fname(p) << " " 
		<< s->f.fval(p) << "; ";
	cout << endl;
    }
    
}


void build_StreamList_hash_table(const EST_StreamList &mlf,
				 EST_TStringHash<EST_Stream*> &hash_table, 
				 const bool base)
{

    EST_Litem *p;
    if (base)
	for (p = mlf.head(); p; p = next(p))
	    hash_table.add_item(basename(mlf(p).name(), "*"),
				&(((EST_TItem<EST_Stream> *)p)->val));
    
    else 
	for (p = mlf.head(); p; p = next(p))
	    hash_table.add_item(mlf(p).name(),
				&(((EST_TItem<EST_Stream> *)p)->val));

}

EST_Stream hashed_StreamList_extract(const EST_TStringHash<EST_Stream*> &hash_table,
				      const EST_String &filename, bool base)
{
    EST_Stream *d;
    EST_String fname = filename;
    int found;

    if (base)
	fname=basename(filename, "*");

    d=hash_table.val(fname,found);

    if(found)
	return *d;

    cerr << "No match for file " << filename << " found in mlf\n";
    EST_Stream dd;
    return dd;
}

