www.pudn.com > cspeech.zip > cspeech.cpp
///////////////////////////////////////////////////////////////////////////// // Name: cspeech.cpp // Purpose: Text to speech class // Author: Julian Smart // Modified by: // Created: 07/02/98 // RCS-ID: $Id$ // Copyright: (c) Julian Smart // Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// #include "cspeech.h" #include "cspeechp.h" #includeCSpeech::CSpeech() { m_speechImpl = new CSpeechImpl(this); } CSpeech::~CSpeech() { delete m_speechImpl; // This can cause crashes, so don't do it from the destructor. // Terminate(); } BOOL CSpeech::Init() { if (!m_speechImpl->InitTTS()) return FALSE; if (EnumerateModes()) { // Set the mode to the first one found. SetMode(0); } else { // CSpeechError("You don't have any text-to-speech engines installed."); return FALSE; } // m_speechImpl->m_pITTSCentral->Register((void*)m_speechImpl->m_pTestNotify, IID_ITTSNotifySink, // &m_speechImpl->m_dwRegKey); return TRUE; } BOOL CSpeech::Terminate() { return m_speechImpl->TerminateTTS(); } BOOL CSpeech::SetMode(int mode) { HRESULT hRes; #ifdef __MFC__ SetCursor(AfxGetApp()->LoadStandardCursor(IDC_WAIT)); #endif if(m_speechImpl->m_pITTSAttributes) { m_speechImpl->m_pITTSAttributes->Release(); m_speechImpl->m_pITTSAttributes= NULL; } if(m_speechImpl->m_pITTSDialogs) { m_speechImpl->m_pITTSDialogs->Release(); m_speechImpl->m_pITTSDialogs= NULL; } if(m_speechImpl->m_pITTSCentral) { m_speechImpl->m_pITTSCentral->UnRegister(m_speechImpl->m_dwRegKey); m_speechImpl->m_pITTSCentral->Release(); m_speechImpl->m_pITTSCentral= NULL; } #ifdef DIRECTSOUND if (m_speechImpl->m_pIAD) { while (m_speechImpl->m_pIAD->Release()); m_speechImpl->m_pIAD= NULL; } #else // DIRECTSOUND if (m_speechImpl->m_pIMMD) { // some engines will leak an audio destination object // but if release they crash //while ( // m_speechImpl->m_pIMMD->Release(); // ); m_speechImpl->m_pIMMD= NULL; } #endif #ifdef DIRECTSOUND hRes = CoCreateInstance (CLSID_AudioDestDirect, NULL, CLSCTX_ALL, IID_IAudioDirect, (void**)&(m_speechImpl->m_pIAD)); if (FAILED(hRes)) { CSpeechMessage("Can't find CLSID_AudioDestDirect"); return FALSE; } // crreate direct sound stuff LPDIRECTSOUND lpDirectSound; hRes = CoCreateInstance (CLSID_DirectSound, NULL, CLSCTX_ALL, IID_IDirectSound, (LPVOID*) &lpDirectSound); if (hRes) { CSpeechMessage( "Can't find IID_IDirectSound" ); return FALSE; } hRes = lpDirectSound->Initialize(NULL); if (hRes) { CSpeechMessage( "Can't initialize DirectSound" ); return FALSE; } #ifdef __MFC__ hRes = lpDirectSound->SetCooperativeLevel (AfxGetApp()->m_pMainWnd, DSSCL_NORMAL); #endif // tell the audio object about our stuff m_speechImpl->m_pIAD->Init ((PVOID) lpDirectSound,IID_IDirectSound); m_speechImpl->m_pIAD->AddRef(); m_speechImpl->m_pITTSEnum->Select(m_speechImpl->m_GUIDModes[mode], &m_speechImpl->m_pITTSCentral, m_speechImpl->m_pIAD); #else // DIRECTSOUND hRes = CoCreateInstance (CLSID_MMAudioDest, NULL, CLSCTX_ALL, IID_IAudioMultiMediaDevice, (void**)&(m_speechImpl->m_pIMMD)); if (FAILED(hRes)) { CSpeechMessage( "Error creating AudioDest Object(CoCreateInstance)." ); } hRes = m_speechImpl->m_pIMMD->DeviceNumSet( 0XFFFFFFFF); // some engines will leak an audio destination object // but if release they crash // m_pIMMD->AddRef(); m_speechImpl->m_pITTSEnum->Select(m_speechImpl->m_GUIDModes[mode], &m_speechImpl->m_pITTSCentral, m_speechImpl->m_pIMMD); #endif // DIRECTSOUND m_speechImpl->m_pITTSCentral->QueryInterface (IID_ITTSAttributes, (void**)&m_speechImpl->m_pITTSAttributes); m_speechImpl->m_pITTSCentral->QueryInterface (IID_ITTSDialogs, (void**)&m_speechImpl->m_pITTSDialogs); m_speechImpl->m_pITTSCentral->Register((void*)m_speechImpl->m_pTestNotify, IID_ITTSNotifySink, &m_speechImpl->m_dwRegKey); return TRUE; } BOOL CSpeech::Pause(BOOL pause) { if (pause) { return (m_speechImpl->m_pITTSCentral->AudioPause() == NOERROR); } else { return (m_speechImpl->m_pITTSCentral->AudioResume() == NOERROR); } } BOOL CSpeech::EnumerateModes(void) { HRESULT hRes; PITTSENUM pClone1; TTSMODEINFO TTSModeInfo; DWORD dwNumTimes; int index = 0; hRes = m_speechImpl->m_pITTSEnum->Clone(&pClone1); if( FAILED(hRes) ) { CSpeechMessage ( "Couldn't clone ITTSEnum state, aborting enumeration test" ); return 0; } hRes = pClone1->Next (1, &TTSModeInfo, &dwNumTimes); if( dwNumTimes == 0 ) return FALSE; while (dwNumTimes) { m_speechImpl->m_modeFeatures[index] = TTSModeInfo.dwFeatures ; m_speechImpl->m_modeNames[index] = TTSModeInfo.szModeName ; m_speechImpl->m_GUIDModes[index] = TTSModeInfo.gModeID; // if (TTSModeInfo.dwFeatures & TTSFEATURE_ANYWORD) hRes = pClone1->Next (1, &TTSModeInfo, &dwNumTimes); index ++; } pClone1->Release(); pClone1=NULL; return index ? TRUE : FALSE; } int CSpeech::FindMode(const CString modeName) { int i; for (i = 0; i < GetModeCount() ; i++) { if (m_speechImpl->m_modeNames[i] == modeName) return i; } return -1; } // Set the pitch BOOL CSpeech::SetPitch(int pitch) { return (m_speechImpl->m_pITTSAttributes->PitchSet(pitch) == NOERROR); } // Get the pitch int CSpeech::GetPitch(void) const { WORD pitch = 0; if (m_speechImpl->m_pITTSAttributes->PitchGet(& pitch) == NOERROR) return pitch; else return -1; } // Get the min pitch int CSpeech::GetMinPitch(void) const { CSpeech *This = (CSpeech*) this; int currPitch = This->GetPitch(); This->SetPitch(TTSATTR_MINPITCH); int minPitch = This->GetPitch(); This->SetPitch(currPitch); return minPitch; } // Get the max pitch int CSpeech::GetMaxPitch(void) const { CSpeech *This = (CSpeech*) this; int currPitch = This->GetPitch(); This->SetPitch(TTSATTR_MAXPITCH); int maxPitch = This->GetPitch(); This->SetPitch(currPitch); return maxPitch; } // Set the speed BOOL CSpeech::SetSpeed(long speed) { return (m_speechImpl->m_pITTSAttributes->SpeedSet(speed) == NOERROR); } // Get the speed long CSpeech::GetSpeed(void) const { DWORD speed = 0; if (m_speechImpl->m_pITTSAttributes->SpeedGet(& speed) == NOERROR) return speed; else return -1; } // Get the min speed long CSpeech::GetMinSpeed(void) const { CSpeech *This = (CSpeech*) this; long currSpeed = This->GetSpeed(); This->SetSpeed(TTSATTR_MINSPEED); long minSpeed = This->GetSpeed(); This->SetSpeed(currSpeed); return minSpeed; } // Get the max speed long CSpeech::GetMaxSpeed(void) const { CSpeech *This = (CSpeech*) this; long currSpeed = This->GetSpeed(); This->SetSpeed(TTSATTR_MAXSPEED); long maxSpeed = This->GetSpeed(); This->SetSpeed(currSpeed); return maxSpeed; } // Set the volume BOOL CSpeech::SetVolume(long volume) { return (m_speechImpl->m_pITTSAttributes->VolumeSet(volume) == NOERROR); } // Get the volume long CSpeech::GetVolume(void) const { DWORD vol = 0; if (m_speechImpl->m_pITTSAttributes->VolumeGet(& vol) == NOERROR) return vol; else return -1; } // Get the min volume long CSpeech::GetMinVolume(void) const { return 0; } // Get the max volume long CSpeech::GetMaxVolume(void) const { return 65535; } BOOL CSpeech::Say(const CString& textStr, BOOL tagged) { SDATA text; text.dwSize = strlen(textStr) + 1; text.pData = (CHAR *) malloc( text.dwSize); strcpy((CHAR*)text.pData, textStr); HRESULT res = m_speechImpl->m_pITTSCentral->TextData(CHARSET_TEXT, tagged, text, NULL, IID_ITTSBufNotifySink); free(text.pData); return (res == NOERROR); } BOOL CSpeech::Reset() { return (m_speechImpl->m_pITTSCentral->AudioReset() == NOERROR); } BOOL CSpeech::Default() { return (m_speechImpl->m_pITTSCentral->Inject("\\rst\\") == NOERROR); } BOOL CSpeech::Inject(const CString& text) { return (m_speechImpl->m_pITTSCentral->Inject(text) == NOERROR); } // Get the mode name by index CString CSpeech::GetModeName(int mode) const { return m_speechImpl->m_modeNames[mode] ; } // Get the mode features by index long CSpeech::GetModeFeatures(int mode) const { return m_speechImpl->m_modeFeatures[mode] ; } // Get the number of modes int CSpeech::GetModeCount(void) const { return m_speechImpl->m_noModes; } // Overridables // ITTSNotifySink BOOL CSpeech::OnAttribChanged(long attribId) { return TRUE; } BOOL CSpeech::OnAudioStart(timestamp_t timeStamp) { return TRUE; } BOOL CSpeech::OnAudioStop(timestamp_t timeStamp) { return TRUE; } BOOL CSpeech::OnVisual(timestamp_t timeStamp, char cIPAPhoneme, char cEnginePhoneme, long dwHints, const CSpeechMouth& mouth) { return TRUE; } // ITTSBufNotifySink BOOL CSpeech::OnBookMark(timestamp_t qTimeStamp, long dwMarkNum) { return TRUE; } BOOL CSpeech::OnTextDataStarted (timestamp_t qTimeStamp) { return TRUE; } BOOL CSpeech::OnTextDataDone (timestamp_t qTimeStamp, long dwFlags) { return TRUE; } BOOL CSpeech::OnWordPosition (timestamp_t qTimeStamp, long dwByteOffset) { return TRUE; } // Shows an engine-specific About dialog. // Returns FALSE if not supported or there is some other error. BOOL CSpeech::AboutDialog(window_t parentWindow, const CString& title) { // I don't know how to convert to PWSTR (Unicode) so // pass NULL for now. return (m_speechImpl->m_pITTSDialogs->AboutDlg((HWND) parentWindow, NULL) == NOERROR); } // Shows an engine-specific general settings dialog. // Returns FALSE if not supported or there is some other error. BOOL CSpeech::GeneralDialog(window_t parentWindow, const CString& title) { // I don't know how to convert to PWSTR (Unicode) so // pass NULL for now. return (m_speechImpl->m_pITTSDialogs->GeneralDlg((HWND) parentWindow, NULL) == NOERROR); } // Shows an engine-specific lexicon dialog. // Returns FALSE if not supported or there is some other error. BOOL CSpeech::LexiconDialog(window_t parentWindow, const CString& title) { // I don't know how to convert to PWSTR (Unicode) so // pass NULL for now. return (m_speechImpl->m_pITTSDialogs->LexiconDlg((HWND) parentWindow, NULL) == NOERROR); } // Shows an engine-specific translation dialog. // Returns FALSE if not supported or there is some other error. BOOL CSpeech::TranslateDialog(window_t parentWindow, const CString& title) { // I don't know how to convert to PWSTR (Unicode) so // pass NULL for now. return (m_speechImpl->m_pITTSDialogs->TranslateDlg((HWND) parentWindow, NULL) == NOERROR); } // CSpeechImpl CSpeechImpl::CSpeechImpl(CSpeech* speech) { m_pITTSEnum = 0; m_pITTSCentral = 0; m_pITTSAttributes = 0; m_pITTSDialogs = 0; m_pTestNotify = 0; m_pTestBufNotify = 0; #ifdef DIRECTSOUND m_pIAD = 0; #else m_pIMMD; #endif m_dwRegKey = 0; m_noModes = 0; m_speech = speech; } CSpeechImpl::~CSpeechImpl() { } BOOL CSpeechImpl::InitTTS(void) { HRESULT hRes; m_pITTSCentral = NULL; m_pITTSEnum = NULL; m_pITTSAttributes = NULL; m_pITTSDialogs = NULL; m_dwRegKey = 0xFFFFFFFF; m_pTestNotify = NULL; m_pTestBufNotify = NULL; #ifdef DIRECTSOUND m_pIAD = NULL; #else m_pIMMD = NULL; #endif hRes = CoCreateInstance (CLSID_TTSEnumerator, NULL, CLSCTX_ALL, IID_ITTSEnum, (void**)&m_pITTSEnum); if (FAILED(hRes)) { CSpeechMessage( "Error creating TTSEnumerator (CoCreateInstance)." ); return FALSE; } if( (m_pTestNotify = new CTestNotify(m_speech)) == NULL ) CSpeechMessage( "Error creating notify pointer." ); if( (m_pTestBufNotify = new CTestBufNotify(m_speech)) == NULL ) CSpeechMessage( "Error creating buf notify pointer." ); return TRUE; } BOOL CSpeechImpl::TerminateTTS(void) { if ( m_pITTSEnum ) { m_pITTSEnum->Release(); m_pITTSEnum = NULL; } if ( m_pITTSAttributes ) { m_pITTSAttributes->Release(); m_pITTSAttributes = NULL; } if ( m_pITTSDialogs ) { m_pITTSDialogs->Release(); m_pITTSDialogs = NULL; } if ( m_pITTSCentral ) { m_pITTSCentral->UnRegister(m_dwRegKey); m_pITTSCentral->Release(); m_pITTSCentral = NULL; } #ifdef DIRECTSOUND if ( m_pIAD ) { while (m_pIAD->Release()); m_pIAD = NULL; } #else if ( m_pIMMD ) { // some engines will leak an audio destination object // but if release they crash //while ( // m_pIMMD->Release(); // ); } #endif if (m_pTestNotify ) { delete(m_pTestNotify); m_pTestNotify = NULL; } if (m_pTestBufNotify) { delete(m_pTestBufNotify); m_pTestBufNotify = NULL; } return TRUE; } /************************************************************************* CTestNotify - Notification object. */ CTestNotify::CTestNotify (CSpeech *pCSpeech) { m_speech = pCSpeech; } CTestNotify::~CTestNotify (void) { // this space intentionally left blank } STDMETHODIMP CTestNotify::QueryInterface (REFIID riid, LPVOID *ppv) { *ppv = NULL; /* always return our IUnknown for IID_IUnknown */ if (IsEqualIID (riid, IID_IUnknown) || IsEqualIID(riid,IID_ITTSNotifySink)) { *ppv = (LPVOID) this; return S_OK; } // otherwise, cant find return ResultFromScode (E_NOINTERFACE); } STDMETHODIMP_ (ULONG) CTestNotify::AddRef (void) { // normally this increases a reference count, but this object // is going to be freed as soon as the app is freed, so it doesn't // matter return 1; } STDMETHODIMP_(ULONG) CTestNotify::Release (void) { // normally this releases a reference count, but this object // is going to be freed when the application is freed so it doesnt // matter return 1; } STDMETHODIMP CTestNotify::AttribChanged (DWORD dwAttribID) { if (m_speech->OnAttribChanged(dwAttribID)) return NOERROR; else return E_FAIL; } STDMETHODIMP CTestNotify::AudioStart (QWORD qTimeStamp) { if (m_speech->OnAudioStart(qTimeStamp)) return NOERROR; else return E_FAIL; } STDMETHODIMP CTestNotify::AudioStop (QWORD qTimeStamp) { if (m_speech->OnAudioStop(qTimeStamp)) return NOERROR; else return E_FAIL; } STDMETHODIMP CTestNotify::Visual (QWORD qTimeStamp, CHAR cIPAPhoneme, CHAR cEnginePhoneme, DWORD dwHints, PTTSMOUTH pTTSMouth) { CSpeechMouth mouth; mouth.m_mouthHeight = pTTSMouth->bMouthHeight; mouth.m_mouthWidth = pTTSMouth->bMouthWidth; mouth.m_mouthUpturn = pTTSMouth->bMouthUpturn; mouth.m_jawOpen = pTTSMouth->bJawOpen; mouth.m_teethUpperVisible = pTTSMouth->bTeethUpperVisible; mouth.m_teethLowerVisible = pTTSMouth->bTeethLowerVisible; mouth.m_tonguePosn = pTTSMouth->bTonguePosn; mouth.m_lipTension = pTTSMouth->bLipTension; if (m_speech->OnVisual(qTimeStamp, cIPAPhoneme, cEnginePhoneme, dwHints, mouth)) return NOERROR; else return E_FAIL; #if 0 CDC* pdc = m_pCSpeech->GetDC(); CRect r,r1; POINT pPoints[6]; m_pCSpeech->GetClientRect(&r); CBrush bkBrush(COLORREF(pdc->GetBkColor())); CBrush redBrush(COLORREF(RGB(0xff,0x00,0x00))); //a solid red brush CBrush whiteBrush(COLORREF(RGB(0xff,0xff,0xff))); //a solid white brush CBrush blackBrush(COLORREF(RGB(0x00,0x00,0x00))); //a solid black brush CBrush *oldBrush = pdc->SelectObject(&bkBrush); int nTeethUpperHeight, nTeethLowerHeight, nMouthHeight; int nCenterX, nCenterY, nLeft, nLeft2, nRight, nRight2; static BOOL fKnowMouth = FALSE; if (!fKnowMouth) { fKnowMouth = TRUE; // remember the mouth position HWND hWndMouth; hWndMouth = ::GetDlgItem (ghWndMain, IDC_MOUTHBOX); ::GetWindowRect(hWndMouth, &gRectMouth); POINT pDlg; pDlg.x = gRectMouth.left; pDlg.y = gRectMouth.top; ::ScreenToClient(ghWndMain, &pDlg); gRectMouth.left = pDlg.x; gRectMouth.top = pDlg.y; pDlg.x = gRectMouth.right; pDlg.y = gRectMouth.bottom; ::ScreenToClient(ghWndMain, &pDlg); gRectMouth.right = pDlg.x; gRectMouth.bottom = pDlg.y; } r1 = gRectMouth; pdc->FillRect(r1,oldBrush); nCenterX = (gRectMouth.left + gRectMouth.right) / 2; nCenterY = (gRectMouth.top * 2 + gRectMouth.bottom) / 3; pdc->SelectObject(redBrush); if (pTTSMouth) { nLeft2 = (LONG) (nCenterX - pTTSMouth->bMouthWidth * 20.0f / 256.0f); nLeft = (LONG) (nCenterX - pTTSMouth->bMouthWidth * 40.0f / 256.0f); nRight2 = (LONG) (nCenterX + pTTSMouth->bMouthWidth * 20.0f / 256.0f); nRight = (LONG) (nCenterX + pTTSMouth->bMouthWidth * 40.0f / 256.0f); nTeethUpperHeight = (LONG) (pTTSMouth->bTeethUpperVisible * 8.0f / 256.0f); nTeethLowerHeight = (LONG) (pTTSMouth->bTeethLowerVisible * 8.0f / 256.0f); nMouthHeight = (LONG) (pTTSMouth->bMouthHeight * 80.0f / 256.0f); pPoints[0].x = pPoints[2].x = nLeft2; pPoints[1].x = nLeft; pPoints[3].x = pPoints[5].x = nRight2; pPoints[4].x = nRight; pPoints[0].y = pPoints[5].y = nCenterY - nTeethUpperHeight - 5; pPoints[1].y = pPoints[4].y = nCenterY; pPoints[2].y = pPoints[3].y = nCenterY - nTeethUpperHeight + nMouthHeight + 5; pdc->Polygon(pPoints, 6); pdc->SelectObject(whiteBrush); pPoints[0].y = pPoints[5].y = nCenterY - nTeethUpperHeight; pPoints[1].y = pPoints[4].y = nCenterY; pPoints[2].y = pPoints[3].y = nCenterY - nTeethUpperHeight + nMouthHeight; pdc->Polygon(pPoints, 6); pdc->SelectObject(blackBrush); pPoints[0].y = pPoints[5].y = nCenterY; pPoints[1].y = pPoints[4].y = nCenterY; pPoints[2].y = pPoints[3].y = nCenterY - nTeethUpperHeight + nMouthHeight - nTeethLowerHeight; pdc->Polygon(pPoints, 6); } else { nLeft2 = (LONG) (nCenterX - 10); nLeft = (LONG) (nCenterX - 20); nRight2 = (LONG) (nCenterX + 10); nRight = (LONG) (nCenterX + 20); pPoints[0].x = pPoints[2].x = nLeft2; pPoints[1].x = nLeft; pPoints[3].x = pPoints[5].x = nRight2; pPoints[4].x = nRight; pPoints[0].y = pPoints[5].y = nCenterY - 5; pPoints[1].y = pPoints[4].y = nCenterY; pPoints[2].y = pPoints[3].y = nCenterY + 5; pdc->Polygon(pPoints, 6); } pdc->SelectObject(oldBrush); m_speech->ReleaseDC(pdc); #endif return NOERROR; } /************************************************************************* CTestBufNotify - Notification object. */ CTestBufNotify::CTestBufNotify (CSpeech* speech) { m_speech = speech; } CTestBufNotify::~CTestBufNotify (void) { // this space intentionally left blank } STDMETHODIMP CTestBufNotify::QueryInterface (REFIID riid, LPVOID *ppv) { *ppv = NULL; /* always return our IUnknown for IID_IUnknown */ if (IsEqualIID (riid, IID_IUnknown) || IsEqualIID(riid,IID_ITTSBufNotifySink)) { *ppv = (LPVOID) this; return S_OK; } // otherwise, cant find return ResultFromScode (E_NOINTERFACE); } STDMETHODIMP_ (ULONG) CTestBufNotify::AddRef (void) { // normally this increases a reference count, but this object // is going to be freed as soon as the app is freed, so it doesn't // matter return 1; } STDMETHODIMP_(ULONG) CTestBufNotify::Release (void) { // normally this releases a reference count, but this object // is going to be freed when the application is freed so it doesnt // matter return 1; } STDMETHODIMP CTestBufNotify::BookMark (QWORD qTimeStamp, DWORD dwMarkNum) { if (m_speech->OnBookMark(qTimeStamp, dwMarkNum)) return NOERROR; else return E_FAIL; } STDMETHODIMP CTestBufNotify::TextDataDone (QWORD qTimeStamp, DWORD dwFlags) { if (m_speech->OnTextDataDone(qTimeStamp, dwFlags)) return NOERROR; else return E_FAIL; } STDMETHODIMP CTestBufNotify::TextDataStarted (QWORD qTimeStamp) { if (m_speech->OnTextDataStarted(qTimeStamp)) return NOERROR; else return E_FAIL; } STDMETHODIMP CTestBufNotify::WordPosition (QWORD qTimeStamp, DWORD dwByteOffset) { if (m_speech->OnWordPosition(qTimeStamp, dwByteOffset)) return NOERROR; else return E_FAIL; } void CSpeechMessage(const CString& msg, const CString& label) { MessageBox (NULL, TEXT(msg), TEXT(label), MB_OK ); }