www.pudn.com > OS.rar > svtts.h, change:2006-10-20,size:19080b
/*
**
** SoftVoice Text-to-Speech API includes file
** X40 Platform
**
** This file contains the constants and structures necessary to
** utilize the SoftVoice Text-to-Speech system and its various API
** functions. For details on the functions and their parameters,
** see the X40 API reference document.
**
**
** Copyright (c) 1993-2004 SoftVoice, Inc. All rights reserved.
**
** SoftVoice Confidential.
**
*/
////////////////////////////// Read Me ///////////////////////////////////////////
//
// This header file is the interface of ETTS (X40) library.
// Note that, ETTS library is a near pointer version library.
// System use this header to implement type-casting, from far to near.
// This library can be used in a near version API as well.
// Any pointer in ETTS library MUST be a 16-bit variable.
// So I define all pointer using short, and those variable types will start with P.
// for example PSTR means "a near pointer to a string", and PSTR is defined by short.
//
// 2005.1.11
// Jacky Lu, SA3, Generalplus
//////////////////////////////////////////////////////////////////////////////////
#ifndef SVOICE_TTS_H
#define SVOICE_TTS_H
// Some useful defines and typedefs.
// Mostly done to maintain compatibility with older Win16/Win32 code.
#ifndef FAR
#define FAR //added by chengye 2006/10/20
#endif
#ifndef PVOID
typedef short PVOID;
#endif
#ifndef HWND
typedef PVOID HWND;
#endif
#ifndef PSTR
typedef unsigned short PSTR; //modify by chengye 2006/10/20
#endif
#ifndef PPSTR
typedef short PPSTR;
#endif
#ifndef DWORD
typedef long DWORD;
#endif
#ifndef LONG
typedef long LONG;
#endif
#ifndef ULONG
typedef unsigned long ULONG;
#endif
#ifndef PULONG
typedef short PULONG;
#endif
#ifndef UWORD
typedef unsigned short UWORD;
#endif
//#ifndef UINT
//typedef unsigned int UINT;
//#endif
#ifndef PINT
typedef short PINT;
#endif
#ifndef UBYTE
typedef unsigned char UBYTE;
#endif
#ifndef PUBYTE
typedef short PUBYTE;
#endif
#ifndef SHORT
typedef signed short SHORT;
#endif
#ifndef LPSTRETTS //added by chengye 2006/10/20
typedef char FAR * LPSTRETTS;
#endif
// SoftVoice event messages sent to the user application.
// Please refer to the API Reference Document for details.
// Events are not supported on the X40 platform.
#define sv_EVENT_SPEECH_STARTED 1000 // speaking has begun
#define sv_EVENT_SPEECH_DONE 1001 // speaking has finished
#define sv_EVENT_NEW_SENTENCE 1002 // start of sentence
#define sv_EVENT_ENGLISH_WORD_START 1003 // start of word (in English)
#define sv_EVENT_PHON_WORD_START 1004 // start of word (in phonetics)
#define sv_EVENT_SYLLABLE_START 1005 // start of syllable
#define sv_EVENT_PHONEME_START 1006 // start of phoneme
#define sv_EVENT_USER_SYNC 1007 // user specified event
#define sv_EVENT_MOUTH_SHAPE 1008 // new mouth shape
#define sv_EVENT_BUFFER_READY 1009 // PCM buffer available
#define sv_EVENT_END_OF_SENTENCE 1010 // end of sentence
// Language options.
// These options are specified in the ulFlags field of the
// SVOpenSpeech API call. Multiple languages may be specified in
// a single call to SVOpenSpeech by "OR"ing these flags together.
// Note that not all languages defined are currently implemented.
#define sv_LANGUAGE_ENGLISH 0x00000001 // the default language
#define sv_LANGUAGE_SPANISH 0x00000002
#define sv_LANGUAGE_GERMAN 0x00000004
#define sv_LANGUAGE_FRENCH 0x00000008
#define sv_LANGUAGE_JAPANESE 0x00000010
#define sv_LANGUAGE_ITALIAN 0x00000020
#define sv_LANGUAGE_DUTCH 0x00000040
#define sv_FLAGS_22KHZ 0x08000000
#define sv_FLAGS_11KHZ 0x04000000 // the default sampling rate
#define sv_FLAGS_8KHZ 0x02000000
#define sv_FLAGS_MULAW 0x01000000 // set for 8 bit mulaw coding
#define sv_FLAGS_16BIT 0x00800000 // the default sample size
#define sv_FLAGS_8BIT 0x00400000
// Options used by the SVNarrate and SVTTS calls.
// These options all operate on the phonetics, not English, text.
// Please refer to the API Reference Document for details.
// Note: These options are not supported on the X40 platform.
#define sv_OPT_WORD_START 0x0001 // generate word start events
#define sv_OPT_SYLLABLE_START 0x0002 // generate syllable start events
#define sv_OPT_PHONEME_START 0x0004 // generate phoneme start events
#define sv_OPT_MOUTH_SHAPE 0x0008 // generate mouth shape events
#define sv_OPT_WORDBYWORD 0x0010 // speak each word in isolation
#define sv_OPT_OVERRIDE 0x0020 // abort in-progress requests
// and start this request
#define sv_OPT_NOSYNCS 0x0040 // don't generate event msgs
#define sv_OPT_IGNORE_EMBCMDS 0x0080 // ignore embedded commands
#define sv_OPT_WRITE_WAVFILE 0x0100 // write out .WAV file
#define sv_OPT_BLOCK 0x0200 // block until speech completed
#define sv_OPT_CALLBACK 0x0400 // send PCM data via callback fcn
// Options used by the SVTextToPhon and SVTTS calls.
// These flags are used to direct how SVTextToPhon translates English
// text to phonetics codes.
// Please refer to the API Reference Document for details.
// Note: Not all options are supported on the X40 platform.
#define sv_TMODE_NATURAL 0x0001 // translate normally (default)
#define sv_TMODE_WORDBYWORD 0x0002 // do word by word translation
#define sv_TMODE_SPELL 0x0004 // spell each word
#define sv_TMODE_WORD_SYNC 0x0008 // insert word sync commands (not on X40)
#define sv_TMODE_READ_EMBCMDS 0x0010 // read embedded cmds outloud
#define sv_TMODE_IGNORE_EMBCMDS 0x0020 // ignore embedded commands
#define sv_TMODE_IGNORE_QM 0x0040 // ignore question marks
#define sv_TMODE_RETURN_PHONBFR 0x0080 // return phonetics buffer (not on X40)
// (SVTTS call only)
// Options used in the SVSetWAVFile call.
// These flags control how the .WAV file is accessed.
// Refer to the API Reference Document for details.
// Note: .WAV files are not supported on the X40 platform.
#define sv_WAV_NEWFILE 0x00000001 // create new .WAV file
#define sv_WAV_OVERWRITE 0x00000002 // overwrite .WAV file
#define sv_WAV_APPEND 0x00000004 // append to .WAV file
// Error return codes. The call SVGetErrorText (qv) may be used to
// return a text description of the error.
#define sv_ERR_BASE 7000
#define sv_ERR_NOERROR 0 // no error
#define sv_ERR_PARSE (sv_ERR_BASE + 1) // parse error
#define sv_ERR_NOMEM (sv_ERR_BASE + 2) // can't alloc memory
#define sv_ERR_NOCOEFMEM (sv_ERR_BASE + 3) // can't alloc mem (coef)
#define sv_ERR_NOWAVEHDRMEM (sv_ERR_BASE + 4) // can't alloc mem (whdr)
#define sv_ERR_NOWAVEMEM (sv_ERR_BASE + 5) // can't alloc mem (wmem)
#define sv_ERR_CANTLOCK (sv_ERR_BASE + 6) // can't lock memory
#define sv_ERR_CANTPREPHDR (sv_ERR_BASE + 7) // can't prepare header
#define sv_ERR_CANTWRITE (sv_ERR_BASE + 8) // waveout write error
#define sv_ERR_INVALIDHANDLE (sv_ERR_BASE + 9) // bad speech handle
#define sv_ERR_OUTOFRANGE (sv_ERR_BASE + 10) // parm out of range
#define sv_ERR_AUDIODEVS (sv_ERR_BASE + 11) // no waveout devices
#define sv_ERR_AUDIOFMT (sv_ERR_BASE + 12) // bad audio format
#define sv_ERR_AUDIOOPEN (sv_ERR_BASE + 13) // waveout open failed
#define sv_ERR_SPEECHBUSY (sv_ERR_BASE + 14) // speech engine busy
#define sv_ERR_EMBCMD (sv_ERR_BASE + 15) // embedded cmd error
#define sv_ERR_CANTABORT (sv_ERR_BASE + 16) // can't override
#define sv_ERR_NOHWND (sv_ERR_BASE + 17) // can't create window
#define sv_ERR_UDCANTOPEN (sv_ERR_BASE + 18) // can't open user dict
#define sv_ERR_UDEOF (sv_ERR_BASE + 19) // eof on user dict
#define sv_ERR_UDFORMAT (sv_ERR_BASE + 20) // user dict format err
#define sv_ERR_EXPIRED (sv_ERR_BASE + 21) // evaluation period
// has expired (demo
// vesions only)
#define sv_ERR_NOLANG (sv_ERR_BASE + 22) // language not supported
#define sv_ERR_NOTIMERS (sv_ERR_BASE + 23) // can't alloc timer svcs
#define sv_ERR_INVALIDADDR (sv_ERR_BASE + 24) // invalid address
#define sv_ERR_UNREGISTERED (sv_ERR_BASE + 25) // unregistered version
#define sv_ERR_NOTIMPLEMENTED (sv_ERR_BASE + 26) // command not implemented
#define sv_ERR_NOUSERVOICE (sv_ERR_BASE + 27) // user voice not found
#define sv_ERR_WAVFILEOPEN (sv_ERR_BASE + 28) // can't open .WAV file
#define sv_ERR_WAVFILEWRITE (sv_ERR_BASE + 29) // .WAV file write error
#define sv_ERR_PHONETICSONLY (sv_ERR_BASE + 30) // phonetics only version
#define sv_ERR_NOTHREAD (sv_ERR_BASE + 31) // cannot create thread
#define sv_ERR_INTERNAL (sv_ERR_BASE + 500) // internal errors
#define sv_ERR_MAXERRORLENGTH 80 // max error msg length
// Vocal tract lengths.
#define sv_VTRACT_MALE 0 // male
#define sv_VTRACT_FEMALE 1 // female
#define sv_VTRACT_CHILD 2 // child's vocal tract
#define sv_VTRACT_BIGMALE 3 // large male vocal tract
// Pitch contour (F0) modes.
#define sv_F0_NATURAL 0 // natural pitch contour
#define sv_F0_STYLE1 0 // natural contour (obsolete #define)
#define sv_F0_RESERVED 1 // reserved for later use
#define sv_F0_MONOTONE 2 // flat pitch contour
#define sv_F0_SING 3 // singing mode
#define sv_F0_RANDOM 4 // major scale random notes per syl
// Voicing modes.
#define sv_VMODE_NORMAL 0 // normal voicing
#define sv_VMODE_BREATHY 1 // mellower with breath noise
#define sv_VMODE_WHISPERED 2 // no voicing, all aspiration
// Glottal source records.
#define sv_GLOT_DEFAULT 0 // use gender to select source
#define sv_GLOT_MALE 1 // adult male
#define sv_GLOT_FEMALE 2 // adult female
#define sv_GLOT_CHILD 3 // young child or high female
#define sv_GLOT_HIGH 4 // for very high voices, > 300Hz
#define sv_GLOT_MELLOW 5 // ellow adult male voice
#define sv_GLOT_IMPULSE 6 // for lowish voices
#define sv_GLOT_ODD 7 // odd harmonics only
#define sv_GLOT_COLOSSUS 8 // Colossus computer voice
// Predefined vocal personalities.
#define sv_PERS_MALE 0 // default male voice
#define sv_PERS_FEMALE 1 // default female voice
#define sv_PERS_LARGEMALE 2 // default large male voice
#define sv_PERS_CHILD 3 // default child voice
#define sv_PERS_GIANTMALE 4 // low pitched male
#define sv_PERS_MELLOWFEM 5 // sexy female
#define sv_PERS_MELLOWMALE 6 // mellow male
#define sv_PERS_CRISPMALE 7 // crisp, clear male
#define sv_PERS_THEFLY 8 // high pitched, quavering voice
#define sv_PERS_ROBOTOID 9 // robotoid
#define sv_PERS_MARTIAN 10 // alien
#define sv_PERS_COLOSSUS 11 // the voice of the Colossus computer
#define sv_PERS_FASTFRED 12 // fast talking voice
#define sv_PERS_OLDWOMAN 13 // elderly woman
#define sv_PERS_MUNCHKIN 14 // dwarf
#define sv_PERS_TROLL 15 // a little larger than a dwarf
#define sv_PERS_NERD 16 // nerdish male
#define sv_PERS_MILKTOAST 17 // whiney male
#define sv_PERS_TIPSY 18 // drunken male
#define sv_PERS_CHOIRBOY 19 // high pitched, sung voice
#define sv_PERS_ACTIVEVOICE 255 // the currently active voice
#define sv_PERS_RICHLOWMALE sv_PERS_GIANTMALE // backwards compatibility
#define sv_PERS_SEXYFEM sv_PERS_MELLOWFEM // backwards compatibility
// Various SoftVoice constants
#define sv_MINPITCH 10 // minimum allowable pitch
#define sv_MAXPITCH 2000 // maximum allowable pitch
#define sv_MINRATE 20 // minimum speaking rate
#define sv_MAXRATE 500 // maximum speaking rate
#define sv_MAXVOICENAMELEN 32 // maximum length of voice name string
// Voice info structure. This structure contains all the
// parameters which define a vocal personality.
#ifdef WIN32
#pragma pack(push,1)
#endif
typedef struct svtts_voiceinfo_tag {
PSTR lpszVoiceName; // far ptr to voice name
UWORD nPitch; // baseline pitch in Hz
UWORD nRate; // speaking rate in words/minute
UWORD nGender; // gender (vocal tract length)
UWORD nGlottalSource; // glottal source
UWORD nVoicingMode; // voicing mode (normal, breathy, etc)
short swTLBias; // tone (brightness) control bias
UWORD nF0Style; // pitch contour (eg natural, monotone)
UWORD nF0Range; // pitch excursion range
UWORD nF0Perturbation; // low freq quasi random pitch jitter
UWORD nF0Hoarse; // hoarseness control % (default: 0%)
UWORD nF0UpSmooth; // F0 up time smoothing constant
UWORD nF0DownSmooth; // F0 down time smoothing constant
UWORD nF0DiploAmt; // F0 diplophonic pulsing amount
UWORD nF0DiploMax; // maximum diplophonic percentage
UWORD nF0DiploThresh; // pitch below which diplo begins
UWORD nVowelFactor; // vowel length control (def: 100%)
short swArticulationEffort; // articulation effort bias (def: 0)
short swArticulationSpeed; // articulation speed bias (def: 0)
short swAVBias; // voicing amp bias (default: 0db)
short swAHBias; // aspiration amp bias (default: 0db)
short swAFBias; // frication amp bias (default: 0db)
short swAVSpeed; // voicing amp speed bias (default: 0)
short swAHSpeed; // aspir amp speed bias (default: 0)
short swAFSpeed; // fric amp speed bias (default: 0)
short swF1Bias; // 1st formant freq shift % (def: 100%)
short swF2Bias; // 2nd formant freq shift % (def: 100%)
short swF3Bias; // 3rd formant freq shift % (def: 100%)
short swB1Bias; // 1st formant b/w shift % (def: 100%)
short swB2Bias; // 2nd formant b/w shift % (def: 100%)
short swB3Bias; // 3rd formant b/w shift % (def: 100%)
short swF4Freq; // fourth formant frequency
short swF4BW; // fourth formant bandwidth
short swF5Freq; // fifth formant frequency
short swF5BW; // fifth formant bandwidth
short swOctave; // octave bias
char szVoiceName[sv_MAXVOICENAMELEN]; // char array for voice name
} SVTTSVOICEINFO;
typedef short PSVTTSVOICEINFO;
#ifdef WIN32
#pragma pack(pop)
#endif
// Definitions for the HSPEECH parameter used in most API calls.
typedef short HSPEECH;
typedef short PHSPEECH;
// The SoftVoice API return code definition.
typedef DWORD SVAPIRET;
// Event info structure. A far pointer to this structure is put into
// the lParam field of the callback messages sent to the application.
// Note: Events are not supported on the X40 platform.
typedef struct svtts_eventinfo_tag {
HSPEECH hSpeech; // speech handle
DWORD dwTime; // relative time of event
UWORD nEvent; // event
UWORD uwEventData; // event specific UWORD
ULONG ulEventData; // event specific ULONG
PSTR lpStartOfInput; // far ptr to input text
PSTR lpStartOfSentence; // far ptr to sentence start
PSTR lpStartOfWord; // far ptr to word start
PSTR lpPhoneme; // far ptr to phoneme
} SVTTSEVENTINFO;
typedef short PSVTTSEVENTINFO;
// SoftVoice version info structure. See the description of the
// SVGetVersionInfo call in the API reference doc for details. In
// particular, note that, after a call to SVGetVersionInfo, this
// structure contains the length of the voice info structure.
typedef struct svtts_versioninfo_tag {
ULONG ulVersion; // major/minor version number
PSTR lpszRegistration; // licensee registration info
PSTR lpszReleaseDate; // version release date
UWORD cbVoiceInfoLength; // length of voice info struct
} SVTTSVERSIONINFO;
typedef short PSVTTSVERSIONINFO;
// X40 specific status flags.
#define sv_STATUS_IDLE 0x0000
#define sv_STATUS_BACKGROUND 0x0001
#define sv_STATUS_SYNTH 0x0002
// Reserved.
// TTS control commands and shared mem segment.
// Note: Not supported on the X40 platform.
#define sv_CMD_RUN 0x00000001
#define sv_CMD_PAUSE 0x00000002
#define sv_CMD_ABORT 0x00000004
#define sv_CMD_EXIT 0x00000008
#define sv_STATUS_RUNNING 0x00000001
#define sv_STATUS_PAUSED 0x00000002
#define sv_STATUS_ABORTING 0x00000004
#define sv_STATUS_EXITING 0x00000008
#define sv_STATUS_CALLBACK 0x00000010
typedef struct svtts_sharedmem_tag {
unsigned long ulSVCommand;
unsigned long ulSVStatus;
int (*pfCallback)(PVOID pPCMBuffer, int cbPCMBuffer);
unsigned long ulReserved[20];
} SVSHAREDMEM;
typedef short PSVSHAREDMEM;
typedef int (*SVTTS_PFNCALLBACK)(PUBYTE , ULONG); // for callback function
#endif