// Copyright (C) 2002 Samy Bengio (bengio@idiap.ch)
//                
//
// This file is part of Torch. Release II.
// [The Ultimate Machine Learning Library]
//
// Torch is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// Torch is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Torch; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

#ifndef SPEECH_HMM_INC
#define SPEECH_HMM_INC

#include "HMM.h"
#include "Dictionary.h"
#include "Grammar.h"
#include "EditDistance.h"
#include "log_add.h"
#include "EMTrainer.h"

namespace Torch {

/** This class implements a special case of Hidden Markov Models that
    can be used to do connected word speech recognition for small
    vocabulary, using embedded training.

    It contains a set of phoneme models (represented by HMMs), a dictionary
    of words (which are sequences of phonemes) and a grammar (which states
    the legal sentences of the langage).

    The decoding is done by creating the whole transition matrix
    and hence is not adapted to large vocabulary problems.

    @author Samy Bengio (bengio@idiap.ch)
*/
class SpeechHMM : public HMM
{
  public:
    /// the number of basic phoneme models 
    int n_models;
    /// the basic phoneme models
    HMM** models;

    /// for each model, a unique name (used for translation)
    char** model_names;

    /** if an initial alignment is given and an emtrainer for each model
        then it is used to train the models after kmeans during reset
    */
    EMTrainer* model_trainer;

    /// the acceptable dictionary, using the indices instead of the names   
    Dictionary* dict;

    /// the acceptable grammar
    Grammar* grammar;

    /// word entrance penalty: during viterbi, penalizes large sentences
    real word_entrance_penalty;

    /// true if the given transition is a transition between words
    bool **word_transitions;

    /// the maximum number of states in the graph (used for allocation)
    int max_n_states;

    /// the relation between model states and SpeechHMM states
    int* states_to_model_states;

    /// the relation between models and SpeechHMM states
    int* states_to_model;

    /// the relation between words and SpeechHMM states
    int* states_to_word;
  
    /// the word sequence corresponding to the state sequence
    int* word_sequence;
    int word_sequence_size;

    /// the target word sequence
    int* target_word_sequence;
    /// the length of the target word sequence
    int target_word_sequence_size;
    /// the length of the longest target word sequence
    int target_word_sequence_max_size;

    /// this object is used to compute the decoding error 
    EditDistance* edit_distance;

    /** In order to create a SpeechHMM, we need to give a vector of #n_models_#
        #HMM#s as well as their corresponding name, a dictionary and a grammar,
        an optional word_entrance_penalty and an optional trainer that can be
        used to initialize each model independently.
    */
    SpeechHMM(int n_models_, HMM **models_, char** model_names_, Dictionary* dict_, Grammar* grammar_, real word_entrance_penalty_ = LOG_ONE, EMTrainer* model_trainer_ = NULL);

    virtual void reset();
    virtual int numberOfParams();
    virtual void allocateMemory();
    virtual void freeMemory();
    virtual void loadFILE(FILE *file);
    virtual void saveFILE(FILE *file);

    virtual void iterInitialize();
    virtual void eMIterInitialize();
    virtual void eMSequenceInitialize(List* inputs);
    virtual void sequenceInitialize(List* inputs);
    virtual void eMAccPosteriors(List *inputs, real log_posterior);
    virtual void viterbiAccPosteriors(List *inputs, real log_posterior);
    virtual void eMUpdate();

    virtual void logViterbi(SeqExample* ex);

    /// this method returns the sentence associated to the input
    virtual void decode(List* input);

    /** this method prepare the transition graph associated with a
        given training sentence
    */

    virtual void prepareTrainModel(List* input);
    /** this method prepare the transition graph associated with a
        given test sentence
    */
    virtual void prepareTestModel(List* input);

    /** this method is used by #prepareTrainModel# and #prepareTestModel#
        to prepare the model. It adds a given word to the current graph.
    */
    virtual int addWordToModel(int word, int current_state);

    /** this method is used by #prepareTrainModel# and #prepareTestModel#
        to prepare the model. It adds the connections between words.
    */
    virtual void addConnectionsBetweenWordsToModel(int word,int next_word, int current_state,int next_current_state, real log_n_next);

    /// this methods reallocate the structure to accomodate a new sequence
    virtual void realloc(int n_frames, int n_states_);

    /// this methods returns the number of states in the grammar
    virtual int nStatesInGrammar();
    /// this methods returns the number of states in a given word
    virtual int nStatesInWord(int word);

    virtual void backward(List *inputs, real *alpha);

    virtual ~SpeechHMM();
};


}

#endif
