;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;                                                                       ;;
;;;                Centre for Speech Technology Research                  ;;
;;;                     University of Edinburgh, UK                       ;;
;;;                       Copyright (c) 1996,1997                         ;;
;;;                        All Rights Reserved.                           ;;
;;;                                                                       ;;
;;;  Permission to use, copy, modify, distribute this software and its    ;;
;;;  documentation for research, educational and individual use only, is  ;;
;;;  hereby granted without fee, subject to the following conditions:     ;;
;;;   1. The code must retain the above copyright notice, this list of    ;;
;;;      conditions and the following disclaimer.                         ;;
;;;   2. Any modifications must be clearly marked as such.                ;;
;;;   3. Original authors' names are not deleted.                         ;;
;;;  This software may not be used for commercial purposes without        ;;
;;;  specific prior written permission from the authors.                  ;;
;;;                                                                       ;;
;;;  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        ;;
;;;  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      ;;
;;;  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   ;;
;;;  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     ;;
;;;  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    ;;
;;;  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   ;;
;;;  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          ;;
;;;  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       ;;
;;;  THIS SOFTWARE.                                                       ;;
;;;                                                                       ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;  Set up rab_diphones using the standard diphone synthesizer
;;;
;;;  Roger diphones: male RP English collected October 1996
;;;

(defvar rab_diphone_dir (cdr (assoc 'rab_diphone voice-locations))
  "rab_diphone_dir
  The default directory for the rab diphone database.")

(require 'mrpa_phones)
(require 'pos)
(require 'phrase)
(require 'tobi)
(require 'f2bf0lr)
(require 'mrpa_durs)
(require 'gswdurtreeZ)

(setup_oald_lex)

;;; You can override the following guesses in your siteinit.scm
;;; This looks for what's available and tries to set that 
;;; its not guaranteed but does its best.  Standard installations
;;; will require no changes
(cond
 ((probe_file (string-append rab_diphone_dir "group/rablpc16k.group"))
    (defvar rab_diphone_type '16k
      "rab_diphone_type
  The default type of rab diphones to use:  20k, 16k or 8k."))
 ((probe_file (string-append rab_diphone_dir "group/rablpc8k.group"))
  (defvar rab_diphone_type '8k
    "rab_diphone_type
  The default type of rab diphones to use:  20k, 16k or 8k."))
 (t
  (defvar rab_diphone_type '8k
    "rab_diphone_type
  The default type of rab diphones to use:  20k, 16k or 8k.")))

(defvar rab_standard_di_db 'rab_lpc
  "rab_standard_di_db
The name of the standard default rab diphone database, rab for pcm
 or rab_lpc (for use with residual excited LPC).")


;;;  The following a diphone database dependent but are sharable between
;;;  the different encodings of the rab diphone db
(set! rab_alternates_before
   '(alternates_before
     ((e@ ei) (@ i) (ll l) (l ll) (ii y) (m= m) (n= n) (l= l)
              (o oo) (a aa)
	      (u uu) (i ii) (i@ ii) (u@ uu) (r @@))))
(set! rab_alternates_after
   '(alternates_after
    ((ll l) (l ll) (y i) (ch t) (jh d) (i y) (d t) (m= m) (n= n) (l= l) 
	    )))
(set! rab_default_diphone '(default_diphone "@-@@"))

(define (rab_diphone_const_clusters utt)
"(rab_diphone_const_clusters UTT)
Identify consonant clusters, dark ls etc in the segment streamitem
ready for diphone resynthesis.  This may be called as a post lexical
rule through poslex_rule_hooks."
  (mapcar
   (lambda (s) (rab_diphone_fix_phone_name utt s))
   (utt.stream utt 'Segment))
  utt)

(define (rab_diphone_fix_phone_name utt seg)
"(rab_diphone_fix_phone_name UTT SEG)
Add the feature diphone_phone_name to given segment with the appropriate
name for constructing a diphone.  Basically adds _ if either side is part
of the same consonant cluster, adds $ either side if in different
syllable for preceding/succeeding vowel syllable, and converts l to ll
in coda part of syllables."
  (let ((newname (streamitem.name seg)))
  (cond
   ((string-equal newname "#") t)
   ((string-equal "-" (utt.streamitem.feat utt seg 'ph_vc))
     ;; Not got a Roger db with these in yet
;    (if (and (string-equal "-" (utt.streamitem.feat utt seg 'p.ph_vc))
;	     (member_string (streamitem.name seg)
;			    '(r w y l m n p t k))
;	     (string-equal (utt.streamitem.feat utt seg 'Syllable.addr)
;			   (utt.streamitem.feat utt seg 'p.Syllable.addr)))
;	(set! newname (format nil "_%s" newname)))
;    (if (and (string-equal "-" (utt.streamitem.feat utt seg 'n.ph_vc))
;	     (member_string (streamitem.name seg)
;			    '(p t k b d g s))
;	     (string-equal (utt.streamitem.feat utt seg 'Syllable.addr)
;			   (utt.streamitem.feat utt seg 'n.Syllable.addr)))
;	(set! newname (format nil "%s_" newname)))
;    (if (and (string-equal "+" (utt.streamitem.feat utt seg 'p.ph_vc))
;	     (member_string (streamitem.name seg)
;			    '(p t k b d g))
;	     (not (string-equal (utt.streamitem.feat utt seg 'Syllable.addr)
;				(utt.streamitem.feat utt seg 'p.Syllable.addr))))
;	(set! newname (format nil "$%s" (streamitem.name seg))))
;    (if (and (string-equal "+" (utt.streamitem.feat utt seg 'n.ph_vc))
;	     (member_string (streamitem.name seg)
;			    '(p t k b d g))
;	     (not (string-equal (utt.streamitem.feat utt seg 'Syllable.addr)
;				(utt.streamitem.feat utt seg 'n.Syllable.addr))))
;	(set! newname (format nil "%s$" newname)))
    (if (and (string-equal "l" newname)
	     (string-equal "+" (utt.streamitem.feat utt seg 'p.ph_vc))
	     (string-equal (utt.streamitem.feat utt seg 'Syllable.addr)
			   (utt.streamitem.feat utt seg 'p.Syllable.addr)))
	(set! newname "ll"))
    ))
  (streamitem.set_feat seg "diphone_phone_name" newname)))

(define (setup_rab_lpc_diphone_8k)
"(setup_rab_lpc_diphone_8k)
  Initialise the Roger diphone database.  This sets up the 8K ulaw version
  for residual excited LPC.  This is the smallest version of the Roger
  diphones, and doesn't sound too bad at all."
'(Diphone_Init
 (list
  '(name rab_lpc)
  (list 'index_file (string-append rab_diphone_dir "dic/diphdic.msec"))
  (list 'signal_dir (string-append rab_diphone_dir "wav/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'lpc_dir (string-append rab_diphone_dir "lpc8k/"))
  '(lpc_ext ".lpc")
  '(lpc_type "htk")
  '(type lpc)
  (list 'pitch_dir (string-append rab_diphone_dir  "pmMLP/"))
  '(samp_freq 8000)
  '(num_diphones 2400)
  '(sig_band 80)
  '(lpc_order 11)
  '(def_f0 100)
  '(group_type ulaw)
  '(phoneset mrpa)
  '(access_type ondemand)
   rab_alternates_before
   rab_alternates_after
   rab_default_diphone
   ))  
(Diphone_Init
  (list
   '(name rab_lpc)
   (list 'group_file (string-append rab_diphone_dir "group/rablpc8k.group"))
   '(samp_freq 8000)
   '(access_type ondemand)
   rab_alternates_before
   rab_alternates_after
   rab_default_diphone
   )))

(define (setup_rab_diphone_8k)
"(setup_rab_diphone_8k)
  Initialise the Roger diphone database.  This sets up the 8K ulaw version
  for PSOLA.  This is the smallest version of the Roger
  diphones, and doesn't sound too bad at all."
  (Diphone_Init
  (list
   '(name rab)
   (list 'group_file (string-append rab_diphone_dir "group/rab8k.group"))
   '(samp_freq 8000)
   '(access_type ondemand)
   rab_alternates_before
   rab_alternates_after
   rab_default_diphone
   )))

(defvar rab_group_or_ungrouped 'group)
(define (setup_rab_lpc_diphone_16k)
"(setup_rab_lpc_diphone_16k)
  Initialise the Roger diphone database.  This sets up the 16K version
  for residual excited LPC."
(if (equal? rab_group_or_ungrouped 'ungrouped)
    (begin
(print 'ungrouped)
(Diphone_Init
 (list
  '(name rab_lpc)
  (list 'index_file (string-append rab_diphone_dir "dic/diphdic.msec"))
  (list 'signal_dir (string-append rab_diphone_dir "wav/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'lpc_dir (string-append rab_diphone_dir "lpc16k/"))
  '(lpc_ext ".lpc")
  '(lpc_type "htk")
  '(type lpc)
  (list 'pitch_dir (string-append rab_diphone_dir  "pmMLP/"))
  '(samp_freq 16000)
  '(num_diphones 2400)
  '(sig_band 160)
  '(lpc_order 19)
  '(def_f0 100)
  '(phoneset mrpa)
  '(access_type ondemand)
   rab_alternates_before
   rab_alternates_after
   rab_default_diphone
   )))
(Diphone_Init
  (list
   '(name rab_lpc)
   (list 'group_file (string-append rab_diphone_dir "group/rablpc16k.group"))
   '(samp_freq 16000)
   '(access_type ondemand)
   rab_alternates_before
   rab_alternates_after
   rab_default_diphone
   )))
   t)


(define (setup_rab_diphone_16k_group)
"(setup_rab_diphone_16k_group)
  Initialise the Roger diphone database.  This sets up the 16K version
  for PSOLA."
  (Diphone_Init
  (list
   '(name rab)
   (list 'group_file (string-append rab_diphone_dir "group/rab16k.group"))
   '(samp_freq 16000)
   '(access_type ondemand)
   rab_alternates_before
   rab_alternates_after
   rab_default_diphone
   )))

(define (setup_rab_diphone_16k)
"(setup_rab_diphone_16k)
  Initialise the Roger diphone database.  This sets up the 16K version
  (pcm) using grouped dictionary."
(Diphone_Init
 (list
  '(name rab)
;  (list 'index_file (string-append rab_diphone_dir "dic/festival.msec"))
  (list 'index_file (string-append rab_diphone_dir "dic/diphdic.msec"))
  (list 'signal_dir (string-append rab_diphone_dir "wav/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'pitch_dir (string-append rab_diphone_dir  "pmMLP/"))
  '(samp_freq 16000)
  '(num_diphones 2400)
  '(sig_band 160)
  '(phoneset mrpa)
  '(access_type ondemand)
   rab_alternates_before
   rab_alternates_after
   rab_default_diphone
   )))

(define (make_rab_diphone_8k)
"(make_rab_diphone_8k)
  Make a grouped file for the rab 8k ulaw version of the diphones.  Loads
  the signals from the raw files and dumps them to a binary version for
  fast loading."
(Diphone_Init
 (list
  '(name rab_lpc)
  (list 'index_file (string-append rab_diphone_dir "dic/diphdic.msec"))
  (list 'signal_dir (string-append rab_diphone_dir "wav/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'lpc_dir (string-append rab_diphone_dir "lpc8k/"))
  '(lpc_ext ".lpc")
  '(lpc_type "htk")
  '(type lpc)
  (list 'pitch_dir (string-append rab_diphone_dir  "pmMLP/"))
  '(samp_freq 8000)
  '(num_diphones 1800)
  '(sig_band 80)
  '(def_f0 100)
  '(lpc_order 11)
  '(phoneset mrpa)
  '(group_type ulaw)
  '(access_type ondemand)
   rab_alternates_before
   rab_alternates_after
   rab_default_diphone
   ))
 (format t "Loaded diphones -- now writing group file\n")
 (Diphone.group 'rab_lpc (string-append rab_diphone_dir "group/rablpc8k.group")))

(define (make_rab_pcm_diphone_8k)
"(make_rab_pcm_diphone_8k)
  Make a grouped file for the rab 8k ulaw version of the diphones.  Loads
  the signals from the raw files and dumps them to a binary version for
  fast loading."
(Diphone_Init
 (list
  '(name rab)
  (list 'index_file (string-append rab_diphone_dir "dic/diphdic.msec"))
  (list 'signal_dir (string-append rab_diphone_dir "wav8k/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'pitch_dir (string-append rab_diphone_dir  "pmMLP/"))
  '(samp_freq 8000)
  '(num_diphones 1800)
  '(sig_band 160)
  '(def_f0 100)
  '(phoneset mrpa)
  '(group_type ulaw)
  '(access_type ondemand)
   rab_alternates_before
   rab_alternates_after
   rab_default_diphone
   ))
 (format t "Loaded diphones -- now writing group file\n")
 (Diphone.group 'rab (string-append rab_diphone_dir "group/rab8k.group")))

(define (make_rab_diphone_16k)
"(make_rab_diphone_16k)
  Make a grouped file for the rab 16k full version of the diphones.  Loads
  the signals from the raw files and dumps them to a binary version for
  fast loading."
 (Diphone_Init
 (list
  '(name rab)
  (list 'index_file (string-append rab_diphone_dir "dic/diphdic.msec"))
  (list 'signal_dir (string-append rab_diphone_dir "wav/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'pitch_dir (string-append rab_diphone_dir "pmMLP/"))
  '(samp_freq 16000)
  '(num_diphones 1800)
  '(def_f0 100)
  '(sig_band 320)   ;; PSOLA(TM) needs pitch mark size window either side
  '(phoneset mrpa)
   rab_alternates_before
   rab_alternates_after
   rab_default_diphone
   ))
 (format t "Loaded diphones -- now writing group file\n")
 (Diphone.group 'rab (string-append rab_diphone_dir "group/rab16k.group")))

(define (make_rab_lpc_diphone_16k)
"(make_rab_lpc_diphone_16k)
  Make a grouped file for the rab 16k LPC version of the diphones.  Loads
  the signals from the raw files and dumps them to a binary version for
  fast loading."
 (Diphone_Init
 (list
  '(name rab_lpc)
  (list 'index_file (string-append rab_diphone_dir "dic/diphdic.msec"))
  (list 'signal_dir (string-append rab_diphone_dir "wav/"))
  '(signal_ext ".wav")
  '(signal_type "nist")
  (list 'lpc_dir (string-append rab_diphone_dir "lpc16k/"))
  '(lpc_ext ".lpc")
  '(lpc_type "htk")
  '(type lpc)
  (list 'pitch_dir (string-append rab_diphone_dir "pmMLP/"))
  '(samp_freq 16000)
  '(num_diphones 1800)
  '(sig_band 160)
  '(phoneset mrpa)
  '(def_f0 100)
  '(lpc_order 19)
   rab_alternates_before
   rab_alternates_after
   rab_default_diphone
   ))
 (format t "Loaded diphones -- now writing group file\n")
 (Diphone.group 'rab_lpc (string-append rab_diphone_dir "group/rablpc16kP.group")))

(define (setup_rab_tay_diphone_16k)
"(setup_rab_tay_diphone)
  Initialise the Roger diphone database for use with Taylor CSTR diphone
  synthesizer."
  (Taylor_Init
   (list
     '(SPEAKER rab)
     '(SAMPLE_RATE 16000)
     '(PHONESET mrpa)
     (list 'ALL_PATH rab_diphone_dir)
     (list 'VOX_PATH (string-append rab_diphone_dir "wav/"))
     (list 'PM_PATH (string-append rab_diphone_dir "pm/"))
     (list 'DIC_NAME 
	   (string-append rab_diphone_dir "dic/diphdic.msec"))
     '(MAX_DIPHONES 2400)
     '(AVAILABLE_DIPHONES 2400)
     '(DIPHONE_TYPE WAVEFORM)
     '(DIPHONE_STORAGE SEPARATE)
     '(DIPHONE_OFFSET 400)
     '(VOX_HEADER_SIZE 1024)
     '(BIT_SIZE 16))))

;; You should set rab_diphone_type in siteinit.scm if you're setup
;; is different from the default

(if (eq rab_standard_di_db 'rab_lpc)   ;; lpc for residual excited
  (if (eq rab_diphone_type '8k)
      (setup_rab_lpc_diphone_8k)
      (setup_rab_lpc_diphone_16k)))
(if (eq rab_standard_di_db 'rab)       ;; pcm
    (if (eq rab_diphone_type '8k)
	(setup_rab_diphone_8k)
	(setup_rab_diphone_16k_group)))

(define (voice_rab_diphone)
"(voice_rab_diphone)
 Set up the current voice to be a British male RP (Roger) speaker using
 the rab diphone set."
  (voice_reset)
  (Parameter.set 'Language 'britishenglish)
  ;; Phone set
  (Parameter.set 'PhoneSet 'mrpa)
  (PhoneSet.select 'mrpa)
  ;; Tokenization rules
  (set! token_to_words english_token_to_words)
  ;; POS tagger
  (set! pos_lex_name "english_poslex")
  (set! pos_ngram_name 'english_pos_ngram)
  (set! pos_supported t)
  (set! guess_pos english_guess_pos)   ;; need this for accents
  ;; Lexicon selection
  (lex.select "oald")
  (set! postlex_rules_hooks (list postlex_apos_s_check))
  ;; Phrase prediction
  (Parameter.set 'Phrase_Method 'prob_models)
  (set! break_pos_ngram_name 'english_break_pos_ngram)
  (set! break_ngram_name 'english_break_ngram)
  (set! phrase_type_tree english_phrase_type_tree)
  ;; Accent and tone prediction
  (set! int_tone_cart_tree f2b_int_tone_cart_tree)
  (set! int_accent_cart_tree f2b_int_accent_cart_tree)
  ;; F0 prediction
  (set! f0_lr_start f2b_f0_lr_start)
  (set! f0_lr_mid f2b_f0_lr_mid)
  (set! f0_lr_end f2b_f0_lr_end)
  (Parameter.set 'Int_Method Intonation_Tree)
  (set! int_lr_params
	'((target_f0_mean 100) (target_f0_std 15)
	  (model_f0_mean 170) (model_f0_std 34)))
  (Parameter.set 'Int_Target_Method Int_Targets_LR)
  ;; Duration prediction -- use gsw durations
  (set! duration_cart_tree gsw_duration_cart_tree)
  (set! duration_ph_info gsw_durs)
  (Parameter.set 'Duration_Method Duration_Tree_ZScores)
  (Parameter.set 'Duration_Stretch 0.95)
  ;; Waveform synthesizer: Roger diphones
  (Parameter.set 'Synth_Method Diphone_Synthesize)
  ;; This assigned the diphone names from their context (_ $ etc)
  (set! diphone_module_hooks (list rab_diphone_const_clusters ))
  (Diphone.select rab_standard_di_db)

  (set! current-voice 'rab_diphone)
)

(define (voice_rab_pcm_diphone)
  (voice_rab_diphone)
  (if (not (member 'rab (Diphone.list)))
      (cond
       ((eq rab_diphone_type '8k)
	  (setup_rab_diphone_8k))
       ((eq rab_diphone_type '16k)
	  (setup_rab_diphone_16k_group))
       (t
	(error "No pcm diphone db for rab at request sample rate"))))
  (Diphone.select 'rab)

  'rab_pcm_diphone
  (set! current-voice 'rab_pcm_diphone)
)

(proclaim_voice
 'rab_diphone
 '((language english)
   (gender male)
   (dialect british)
   (description
    "This voice provides a British RP English male voice using a
     residual excited LPC diphone synthesis method.  It uses a 
     modified Oxford Advanced Learners' Dictionary for pronunciations.
     Prosodic phrasing is provided by a statistically trained model
     using part of speech and local distribution of breaks.  Intonation
     is provided by a CART tree predicting ToBI accents and an F0 
     contour generated from a model trained from natural speech.  The
     duration model is also trained from data using a CART tree.")))

(provide 'rab_diphone)
