diff options
-rw-r--r-- | include/tts/TtsEngine.h | 39 | ||||
-rw-r--r--[-rwxr-xr-x] | tts/jni/android_tts_SynthProxy.cpp | 39 |
2 files changed, 52 insertions, 26 deletions
diff --git a/include/tts/TtsEngine.h b/include/tts/TtsEngine.h index bf62995..e0220ea 100644 --- a/include/tts/TtsEngine.h +++ b/include/tts/TtsEngine.h @@ -25,24 +25,29 @@ namespace android { +enum tts_synth_status { + TTS_SYNTH_DONE = 0, + TTS_SYNTH_PENDING = 1 +}; + +enum tts_callback_status { + TTS_CALLBACK_HALT = 0, + TTS_CALLBACK_CONTINUE = 1 +}; + // The callback is used by the implementation of this interface to notify its // client, the Android TTS service, that the last requested synthesis has been -// completed. +// completed. // TODO reword // The callback for synthesis completed takes: -// void * - The userdata pointer set in the original synth call -// uint32_t - Track sampling rate in Hz -// audio_format - The AudioSystem::audio_format enum -// int - The number of channels -// int8_t * - A buffer of audio data only valid during the execution of the callback -// size_t - The size of the buffer -// Note about memory management: -// The implementation of TtsEngine is responsible for the management of the memory -// it allocates to store the synthesized speech. After the execution of the callback -// to hand the synthesized data to the client of TtsEngine, the TTS engine is -// free to reuse or free the previously allocated memory. -// This implies that the implementation of the "synthDoneCB" callback cannot use -// the pointer to the buffer of audio samples outside of the callback itself. -typedef void (synthDoneCB_t)(void *, uint32_t, AudioSystem::audio_format, int, int8_t *, size_t); +// [inout] void *& - The userdata pointer set in the original synth call +// [in] uint32_t - Track sampling rate in Hz +// [in] audio_format - The AudioSystem::audio_format enum +// [in] int - The number of channels +// [inout] int8_t *& - A buffer of audio data only valid during the execution of the callback +// [inout] size_t & - The size of the buffer +// [in] tts_synth_status - Status of the synthesis; 0 for done, 1 for more data to be synthesized. +// Returns the status of the consumer of the synthesis. 0 for stop, 1 for continue. +typedef tts_callback_status (synthDoneCB_t)(void *&, uint32_t, AudioSystem::audio_format, int, int8_t *&, size_t&, tts_synth_status); class TtsEngine; extern "C" TtsEngine* getTtsEngine(); @@ -155,13 +160,13 @@ public: // @param text the UTF-8 text to synthesize // @param userdata pointer to be returned when the call is invoked // @return TTS_SUCCESS or TTS_FAILURE - virtual tts_result synthesizeText(const char *text, void *userdata); + virtual tts_result synthesizeText(const char *text, int8_t *buffer, size_t bufferSize, void *userdata); // Synthesize IPA text. When synthesis completes, the engine must call the given callback to notify the TTS API. // @param ipa the IPA data to synthesize // @param userdata pointer to be returned when the call is invoked // @return TTS_FEATURE_UNSUPPORTED if IPA is not supported, otherwise TTS_SUCCESS or TTS_FAILURE - virtual tts_result synthesizeIpa(const char *ipa, void *userdata); + virtual tts_result synthesizeIpa(const char *ipa, int8_t *buffer, size_t bufferSize, void *userdata); }; } // namespace android diff --git a/tts/jni/android_tts_SynthProxy.cpp b/tts/jni/android_tts_SynthProxy.cpp index d8f1bf3..582e6219 100755..100644 --- a/tts/jni/android_tts_SynthProxy.cpp +++ b/tts/jni/android_tts_SynthProxy.cpp @@ -32,6 +32,7 @@ #define DEFAULT_TTS_RATE 16000 #define DEFAULT_TTS_FORMAT AudioSystem::PCM_16_BIT #define DEFAULT_TTS_NB_CHANNELS 1 +#define DEFAULT_TTS_BUFFERSIZE 1024 #define USAGEMODE_PLAY_IMMEDIATELY 0 #define USAGEMODE_WRITE_TO_FILE 1 @@ -64,6 +65,8 @@ class SynthProxyJniStorage { uint32_t mSampleRate; AudioSystem::audio_format mAudFormat; int mNbChannels; + int8_t * mBuffer; + size_t mBufferSize; SynthProxyJniStorage() { //tts_class = NULL; @@ -73,6 +76,8 @@ class SynthProxyJniStorage { mSampleRate = DEFAULT_TTS_RATE; mAudFormat = DEFAULT_TTS_FORMAT; mNbChannels = DEFAULT_TTS_NB_CHANNELS; + mBufferSize = DEFAULT_TTS_BUFFERSIZE; + mBuffer = new int8_t[mBufferSize]; } ~SynthProxyJniStorage() { @@ -81,6 +86,7 @@ class SynthProxyJniStorage { mNativeSynthInterface->shutdown(); mNativeSynthInterface = NULL; } + delete mBuffer; } void killAudio() { @@ -159,23 +165,27 @@ void prepAudioTrack(SynthProxyJniStorage* pJniData, * Callback from TTS engine. * Directly speaks using AudioTrack or write to file */ -static void ttsSynthDoneCB(void * userdata, uint32_t rate, +static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate, AudioSystem::audio_format format, int channel, - int8_t *wav, size_t bufferSize) { + int8_t *&wav, size_t &bufferSize, tts_synth_status status) { LOGI("ttsSynthDoneCallback: %d bytes", bufferSize); + if (userdata == NULL){ + LOGE("userdata == NULL"); + return TTS_CALLBACK_HALT; + } afterSynthData_t* pForAfter = (afterSynthData_t*)userdata; + SynthProxyJniStorage* pJniData = (SynthProxyJniStorage*)(pForAfter->jniStorage); if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){ LOGI("Direct speech"); if (wav == NULL) { + delete pForAfter; LOGI("Null: speech has completed"); } if (bufferSize > 0) { - SynthProxyJniStorage* pJniData = - (SynthProxyJniStorage*)(pForAfter->jniStorage); prepAudioTrack(pJniData, rate, format, channel); if (pJniData->mAudioOut) { pJniData->mAudioOut->write(wav, bufferSize); @@ -187,6 +197,7 @@ static void ttsSynthDoneCB(void * userdata, uint32_t rate, } else if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) { LOGI("Save to file"); if (wav == NULL) { + delete pForAfter; LOGI("Null: speech has completed"); } if (bufferSize > 0){ @@ -195,10 +206,17 @@ static void ttsSynthDoneCB(void * userdata, uint32_t rate, } // TODO update to call back into the SynthProxy class through the // javaTTSFields.synthProxyMethodPost methode to notify - // playback has completed + // playback has completed if the synthesis is done, i.e. + // if status == TTS_SYNTH_DONE + //delete pForAfter; + + // we don't update the wav (output) parameter as we'll let the next callback + // write at the same location, we've consumed the data already, but we need + // to update bufferSize to let the TTS engine know how much it can write the + // next time it calls this function. + bufferSize = pJniData->mBufferSize; - delete pForAfter; - return; + return TTS_CALLBACK_CONTINUE; } @@ -223,7 +241,9 @@ android_tts_SynthProxy_native_setup(JNIEnv *env, jobject thiz, } else { TtsEngine *(*get_TtsEngine)() = reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine")); + pJniStorage->mNativeSynthInterface = (*get_TtsEngine)(); + if (pJniStorage->mNativeSynthInterface) { pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB); } @@ -323,7 +343,7 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData, // TODO check return codes if (pSynthData->mNativeSynthInterface) { - pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, + pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize, (void *)pForAfter); } @@ -395,7 +415,7 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData, if (pSynthData->mNativeSynthInterface) { const char *textNativeString = env->GetStringUTFChars(textJavaString, 0); - pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, + pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize, (void *)pForAfter); env->ReleaseStringUTFChars(textJavaString, textNativeString); } @@ -442,6 +462,7 @@ static void android_tts_SynthProxy_playAudioBuffer(JNIEnv *env, jobject thiz, jint jniData, int bufferPointer, int bufferSize) { +LOGI("android_tts_SynthProxy_playAudioBuffer"); if (jniData == 0) { LOGE("android_tts_SynthProxy_playAudioBuffer(): invalid JNI data"); return; |