;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;                                                                       ;;
;;;                Centre for Speech Technology Research                  ;;
;;;                     University of Edinburgh, UK                       ;;
;;;                         Copyright (c) 1997                            ;;
;;;                        All Rights Reserved.                           ;;
;;;                                                                       ;;
;;;  Permission to use, copy, modify, distribute this software and its    ;;
;;;  documentation for research, educational and individual use only, is  ;;
;;;  hereby granted without fee, subject to the following conditions:     ;;
;;;   1. The code must retain the above copyright notice, this list of    ;;
;;;      conditions and the following disclaimer.                         ;;
;;;   2. Any modifications must be clearly marked as such.                ;;
;;;   3. Original authors' names are not deleted.                         ;;
;;;  This software may not be used for commercial purposes without        ;;
;;;  specific prior written permission from the authors.                  ;;
;;;                                                                       ;;
;;;  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        ;;
;;;  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ;;
;;;  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ;;
;;;  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     ;;
;;;  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ;;
;;;  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ;;
;;;  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ;;
;;;  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ;;
;;;  THIS SOFTWARE.                                                       ;;
;;;                                                                       ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;  Set up kd_diphones using the standard diphone synthesizer
;;;
;;;  Kurt diphones: male American English collected Spring 1997
;;;

(defvar kd_diphone_dir (cdr (assoc 'kd_diphone voice-locations))
  "kd_diphone_dir
  The default directory for the kd diphone database.")
(set! load-path (cons (string-append kd_diphone_dir "festvox/") load-path))

(require 'radio_phones)

;;;  The following a diphone database dependent but are sharable between
;;;  the different encodings of the kd diphone db
(set! kd_alternates_before
   '(alternates_before
     ((pau sil) (hh h) (axr er) (aor ao) (ll l) (ih iy) (ah aa) (er r))))
(set! kd_alternates_after
   '(alternates_after
     ((pau sil) (hh h) (ch t) (jh d) (axr er) (aor ao) (ll l))))
(set! kd_default_diphone '(default_diphone "sil-sil"))

;;;;
;;;;  Our general diphone scheme allows identification of consonant
;;;   clusters etc the follow rules should work for American English
;;;;
(define (kd_diphone_const_clusters utt)
"(kd_diphone_const_clusters UTT)
Identify consonant clusters, dark ls etc in the segment stream
ready for diphone resynthesis.  This may be called as a post lexical
rule through poslex_rule_hooks."
  (mapcar
   (lambda (s) (kd_diphone_fix_phone_name utt s))
   (utt.stream utt 'Segment))
  utt)

(define (kd_diphone_fix_phone_name utt seg)
"(kd_diphone_fix_phone_name UTT SEG)
Add the feature diphone_phone_name to given segment with the appropriate
name for constructing a diphone.  Basically adds _ if either side is part
of the same consonant cluster, adds $ either side if in different
syllable for preceding/succeeding vowel syllable, and converts l to ll
 in coda part of syllables."
  (let ((newname (streamitem.name seg)))
  (cond
   ((string-equal newname "pau") t)
   ((string-equal "-" (utt.streamitem.feat utt seg 'ph_vc))
    (if (and (string-equal "-" (utt.streamitem.feat utt seg 'p.ph_vc))
	     (member_string (streamitem.name seg)
			    '(r w y l m n p t k))
	     (string-equal (utt.streamitem.feat utt seg 'Syllable.addr)
			   (utt.streamitem.feat utt seg 'p.Syllable.addr)))
	(set! newname (format nil "_%s" newname)))
    (if (and (string-equal "-" (utt.streamitem.feat utt seg 'n.ph_vc))
	     (member_string (streamitem.name seg)
			    '(p t k b d g s))
	     (string-equal (utt.streamitem.feat utt seg 'Syllable.addr)
			   (utt.streamitem.feat utt seg 'n.Syllable.addr)))
	(set! newname (format nil "%s_" newname)))
;    (if (and (string-equal "+" (utt.streamitem.feat utt seg 'p.ph_vc))
;	     (member_string (streamitem.name seg)
;			    '(p t k b d g))
;	     (not (string-equal (utt.streamitem.feat utt seg 'Syllable.addr)
;				(utt.streamitem.feat utt seg 'p.Syllable.addr))))
;	(set! newname (format nil "$%s" (streamitem.name seg))))
;    (if (and (string-equal "+" (utt.streamitem.feat utt seg 'n.ph_vc))
;	     (member_string (streamitem.name seg)
;			    '(p t k b d g))
;	     (not (string-equal (utt.streamitem.feat utt seg 'Syllable.addr)
;				(utt.streamitem.feat utt seg 'n.Syllable.addr))))
;	(set! newname (format nil "%s$" newname)))
    (if (and (string-equal "l" newname)
	     (string-equal "+" (utt.streamitem.feat utt seg 'p.ph_vc))
	     (string-equal (utt.streamitem.feat utt seg 'Syllable.addr)
			   (utt.streamitem.feat utt seg 'p.Syllable.addr)))
	(set! newname "ll"))
    )
   ((and (string-equal "ao" (streamitem.name seg))
	 (string-equal "r" (utt.streamitem.feat utt seg 'n.name)))
    (set! newname "aor")))
  (streamitem.set_feat seg "diphone_phone_name" newname)))

(define (setup_kd_diphone_16k)
"(setup_kd_diphone)
  Initialise the Kurt diphone database.  This sets up the 16K version
  (pcm) using grouped dictionary."
(print "kd 16k pcm ungrouped")
(Diphone_Init
 (list
  '(name kd)
  (list 'index_file (string-append kd_diphone_dir "dic/diphdic.msec"))
  (list 'signal_dir (string-append kd_diphone_dir "wavN/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'pitch_dir (string-append kd_diphone_dir  "pm/"))
  '(samp_freq 16000)
  '(num_diphones 2400)
  '(sig_band 160)
  '(phoneset radio)
  '(access_type ondemand)
   kd_alternates_before
   kd_alternates_after
   kd_default_diphone
   )))

(define (setup_kd_diphone_16k_grouped)
"(setup_kd_diphone_16k_grouped)
  Initialise the Kurt diphone database.  This sets up the 16K version
  (pcm) using grouped dictionary."
(Diphone_Init
 (list
  '(name kd)
   (list 'group_file (string-append kd_diphone_dir "group/kd16k.group"))
   '(samp_freq 16000)
   '(access_type ondemand)
   kd_alternates_before
   kd_alternates_after
   kd_default_diphone)))

(define (setup_kd_diphone_lpc16k_grouped)
"(setup_kd_diphone_lpc16k_grouped)
  Initialise the Kurt diphone database.  This sets up the 16K version
  (lpc) using grouped dictionary."
(Diphone_Init
 (list
  '(name kd)
   (list 'group_file (string-append kd_diphone_dir "group/kdlpc16k.group"))
   '(samp_freq 16000)
   '(access_type ondemand)
   kd_alternates_before
   kd_alternates_after
   kd_default_diphone)))

(define (setup_kd_diphone_lpc8k_grouped)
"(setup_kd_diphone_lpc8k_grouped)
  Initialise the Kurt diphone database.  This sets up the 8K version
  (lpc) using grouped dictionary."
(Diphone_Init
 (list
  '(name kd)
   (list 'group_file (string-append kd_diphone_dir "group/kdlpc8k.group"))
   '(samp_freq 8000)
   '(access_type ondemand)
   kd_alternates_before
   kd_alternates_after
   kd_default_diphone)))

(define (setup_kd_diphone_lpc_16k)
"(setup_kd_diphone_lpc_16k)
  Initialise the Kurt diphone database.  This sets up the 16K version
  (lpc) using ungrouped dictionary."
(print "kd lpc 16k ungrouped")
(Diphone_Init
 (list
  '(name kd)
  (list 'index_file (string-append kd_diphone_dir "dic/diphdic.msec"))
  (list 'signal_dir (string-append kd_diphone_dir "wavN/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'lpc_dir (string-append kd_diphone_dir "lpc16k/"))
  '(lpc_ext ".lpc")
  '(lpc_type "htk")
  '(lpc_res_type "nist")
  '(lpc_frame_offset 0)
  '(lpc_res_offset 0.0)
  '(lpc_res_ext ".res")
  '(type lpc)
  (list 'pitch_dir (string-append kd_diphone_dir  "pm/"))
  '(samp_freq 16000)
  '(num_diphones 2400)
  '(sig_band 160)
  '(lpc_order 19)
  '(def_f0 110)
  '(phoneset radio)
  '(access_type ondemand)
   kd_alternates_before
   kd_alternates_after
   kd_default_diphone
   )))

(define (make_kd_diphone_16k)
"(make_kd_diphone_16k)
  Make a grouped file for the kd 16k full version of the diphones.  Loads
  the signals from the raw files and dumps them to a binary version for
  fast loading."
 (Diphone_Init
 (list
  '(name kd)
  (list 'index_file (string-append kd_diphone_dir "dic/diphdic.msec"))
  (list 'signal_dir (string-append kd_diphone_dir "wavN/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'pitch_dir (string-append kd_diphone_dir "pm/"))
  '(samp_freq 16000)
  '(num_diphones 2100)
  '(def_f0 100)
  '(sig_band 160)   ;; PSOLA(TM) needs pitch mark size window either side
  '(phoneset radio)
   kd_alternates_before
   kd_alternates_after
   kd_default_diphone
   ))
 (format t "Loaded diphones -- now writing group file\n")
 (Diphone.group 'kd (string-append kd_diphone_dir "group/kd16k.group")))

(define (make_kd_diphone_lpc16k)
"(make_kd_diphone_16k)
  Make a grouped file for the kd 16k lpc version of the diphones.  Loads
  the signals from the raw files and dumps them to a binary version for
  fast loading.  Uses LPCs from lpc_analysis"
(Diphone_Init
 (list
  '(name kd)
  (list 'index_file (string-append kd_diphone_dir "dic/diphdic.msec"))
  (list 'signal_dir (string-append kd_diphone_dir "wavN/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'lpc_dir (string-append kd_diphone_dir "lpc16k/"))
  '(lpc_ext ".lpc")
  '(lpc_type "htk")
  '(lpc_res_type "nist")
  '(lpc_frame_offset 0)
  '(lpc_res_offset 0.0)
;  (list 'lpc_dir (string-append kd_diphone_dir "lpc16k-esps/"))
;  '(lpc_ext ".lpc")
;  '(lpc_type "htk")
;  '(lpc_res_type "esps")
;  '(lpc_frame_offset 1)
;  '(lpc_res_offset 0.01)
  '(type lpc)
  (list 'pitch_dir (string-append kd_diphone_dir  "pm/"))
  '(samp_freq 16000)
  '(num_diphones 2400)
  '(sig_band 160)
  '(lpc_order 19)
  '(def_f0 110)
  '(phoneset radio)
   kd_alternates_before
   kd_alternates_after
   kd_default_diphone
   ))
 (format t "Loaded diphones -- now writing group file\n")
 (Diphone.group 'kd (string-append kd_diphone_dir "group/kdlpc16k.group")))

(define (make_kd_diphone_lpc8k)
"(make_kd_diphone_8k)
  Make a grouped file for the kd 8k lpc version of the diphones.  Loads
  the signals from the raw files and dumps them to a binary version for
  fast loading.  Uses LPCs from lpc_analysis"
(Diphone_Init
 (list
  '(name kd)
  ;; don't include syllabic consonants or $ stuff
  (list 'index_file (string-append kd_diphone_dir "dic/diphdic.msec.small"))
  (list 'signal_dir (string-append kd_diphone_dir "wavN/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'lpc_dir (string-append kd_diphone_dir "lpc8k/"))
  '(lpc_ext ".lpc")
  '(lpc_type "htk")
  '(lpc_res_type "nist")
  '(lpc_frame_offset 0)
  '(lpc_res_offset 0.0)
  '(type lpc)
  (list 'pitch_dir (string-append kd_diphone_dir  "pm/"))
  '(samp_freq 8000)
  '(group_type ulaw)
  '(num_diphones 1700)
  '(sig_band 80)
  '(lpc_order 11)
  '(def_f0 110)
  '(phoneset radio)
   kd_alternates_before
   kd_alternates_after
   kd_default_diphone
   ))
 (format t "Loaded diphones -- now writing group file\n")
 (Diphone.group 'kd (string-append kd_diphone_dir "group/kdlpc8k.group")))


;;;  Set up the CMU lexicon
(setup_cmu_lex)

;;;  Select sample rate based on what is installed
(cond
 ((probe_file (string-append kd_diphone_dir "group/kdlpc16k.group"))
  (defvar kd_diphone_type 'kd_lpc16k_grouped))
 ((probe_file (string-append kd_diphone_dir "group/kdlpc8k.group"))
  (defvar kd_diphone_type 'kd_lpc8k_grouped))
 (t
  (defvar kd_diphone_type 'kd_lpc16k_grouped)))

(if (equal? kd_diphone_type 'kd_lpc16k_grouped)
    (setup_kd_diphone_lpc16k_grouped))
(if (equal? kd_diphone_type 'kd_lpc8k_grouped)
    (setup_kd_diphone_lpc8k_grouped))
(if (equal? kd_diphone_type 'kd_lpc_ungrouped)
    (setup_kd_diphone_lpc_16k))
(if (equal? kd_diphone_type 'kd_grouped)
    (setup_kd_diphone_16k_grouped))
(if (equal? kd_diphone_type 'kd_ungrouped)
    (setup_kd_diphone_16k))

(load (string-append kd_diphone_dir "festvox/kddurtreeZ.scm"))

(define (voice_kd_diphone)
"(voice_kd_diphone)
 Set up the current voice to be male  American English (Kurt) using
 the standard diphone corpus."
  ;; Phone set
  (voice_reset)
  (Parameter.set 'Language 'americanenglish)
  (require 'radio_phones)
  (Parameter.set 'PhoneSet 'radio)
  (PhoneSet.select 'radio)
  ;; Tokenization rules
  (set! token_to_words english_token_to_words)
  ;; POS tagger
  (require 'pos)
  (set! pos_lex_name "english_poslex")
  (set! pos_ngram_name 'english_pos_ngram)
  (set! pos_supported t)
  (set! guess_pos english_guess_pos)   ;; need this for accents
  ;; Lexicon selection
  (lex.select "cmu")
  (set! postlex_rules_hooks (list postlex_apos_s_check))
  ;; Phrase prediction
  (require 'phrase)
  (Parameter.set 'Phrase_Method 'prob_models)
  (set! break_pos_ngram_name 'english_break_pos_ngram)
  (set! break_ngram_name 'english_break_ngram)
  ;; Accent and tone prediction
  (require 'tobi)
  (set! int_tone_cart_tree f2b_int_tone_cart_tree)
  (set! int_accent_cart_tree f2b_int_accent_cart_tree)

  (set! postlex_vowel_reduce_cart_tree 
	postlex_vowel_reduce_cart_data)
  ;; F0 prediction
  (require 'f2bf0lr)
  (set! f0_lr_start f2b_f0_lr_start)
  (set! f0_lr_mid f2b_f0_lr_mid)
  (set! f0_lr_end f2b_f0_lr_end)
  (Parameter.set 'Int_Method Intonation_Tree)
  (set! int_lr_params
	'((target_f0_mean 110) (target_f0_std 15)
	  (model_f0_mean 170) (model_f0_std 34)))
  (Parameter.set 'Int_Target_Method Int_Targets_LR)
  ;; Duration prediction
  (set! duration_cart_tree kd_duration_cart_tree)
  (set! duration_ph_info kd_durs)
  (Parameter.set 'Duration_Method Duration_Tree_ZScores)
  (Parameter.set 'Duration_Stretch 1.00)
  ;; Waveform synthesizer: kd diphones
  (Parameter.set 'Synth_Method Diphone_Synthesize)
  ;; This assigned the diphone names from their context (_ $ etc)
  (set! diphone_module_hooks (list kd_diphone_const_clusters ))
  (Diphone.select 'kd)

  (set! current-voice 'kd_diphone)
)

(proclaim_voice
 'kd_diphone
 '((language english)
   (gender male)
   (dialect american)
   (description
    "This voice provides an American English male voice using a
     residual excited LPC diphone synthesis method.  It uses a 
     tme CMU Lexicon pronunciations.  Prosodic phrasing is provided 
     by a statistically trained model using part of speech and local 
     distribution of breaks.  Intonation is provided by a CART tree 
     predicting ToBI accents and an F0 contour generated from a model 
     trained from natural speech.  The duration model is also trained 
     from data using a CART tree.")))

(provide 'kd_diphone)
