diff options
author | Jean-Michel Trivi <jmtrivi@google.com> | 2011-01-05 16:08:21 -0800 |
---|---|---|
committer | Jean-Michel Trivi <jmtrivi@google.com> | 2011-01-05 16:24:30 -0800 |
commit | 9d2d26af2e1111251f5a21213a071eb4fdc1224f (patch) | |
tree | 502090359174aef58fdc840b22f3f423f31c099b | |
parent | 2cdee233125a6cc4b00a2962d5a50273d6bb5410 (diff) | |
download | frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.zip frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.tar.gz frameworks_base-9d2d26af2e1111251f5a21213a071eb4fdc1224f.tar.bz2 |
Add support in TTS for volume and panning control of the synth output
Add two new parameters that are used when synthesizing text and
playing it back directly to control the volume and left-right
panning of the output.
Panning is applied using a balance law, which is not energy-preserving
but which doesn't lower the volume when not panning / panning to
center (legacy behavior).
Reduced amount of logs, and removed spoken text.
In TextToSpeech.java: added convenience method to handle the setting
of the cached synthesis parameters.
Change-Id: I235d3d3193283ccc1891e2065d43787e3f63304d
-rwxr-xr-x | core/java/android/speech/tts/TextToSpeech.java | 111 | ||||
-rw-r--r-- | packages/TtsService/jni/android_tts_SynthProxy.cpp | 54 | ||||
-rwxr-xr-x | packages/TtsService/src/android/tts/SynthProxy.java | 11 | ||||
-rwxr-xr-x | packages/TtsService/src/android/tts/TtsService.java | 17 |
4 files changed, 144 insertions, 49 deletions
diff --git a/core/java/android/speech/tts/TextToSpeech.java b/core/java/android/speech/tts/TextToSpeech.java index 841257f..f010076 100755 --- a/core/java/android/speech/tts/TextToSpeech.java +++ b/core/java/android/speech/tts/TextToSpeech.java @@ -151,6 +151,23 @@ public class TextToSpeech { /** * {@hide} */ + public static final float DEFAULT_VOLUME = 1.0f; + /** + * {@hide} + */ + protected static final String DEFAULT_VOLUME_STRING = "1.0"; + /** + * {@hide} + */ + public static final float DEFAULT_PAN = 0.0f; + /** + * {@hide} + */ + protected static final String DEFAULT_PAN_STRING = "0.0"; + + /** + * {@hide} + */ public static final int USE_DEFAULTS = 0; // false /** * {@hide} @@ -331,6 +348,24 @@ public class TextToSpeech { * @see TextToSpeech#synthesizeToFile(String, HashMap, String) */ public static final String KEY_PARAM_UTTERANCE_ID = "utteranceId"; + /** + * {@hide} + * Parameter key to specify the speech volume relative to the current stream type + * volume used when speaking text. Volume is specified as a float ranging from 0 to 1 + * where 0 is silence, and 1 is the maximum volume. + * @see TextToSpeech#speak(String, int, HashMap) + * @see TextToSpeech#playEarcon(String, int, HashMap) + */ + public static final String KEY_PARAM_VOLUME = "volume"; + /** + * {@hide} + * Parameter key to specify how the speech is panned from left to right when speaking text. + * Pan is specified as a float ranging from -1 to +1 where -1 maps to a hard-left pan, + * 0 to center, and +1 to hard-right. + * @see TextToSpeech#speak(String, int, HashMap) + * @see TextToSpeech#playEarcon(String, int, HashMap) + */ + public static final String KEY_PARAM_PAN = "pan"; // key positions in the array of cached parameters /** @@ -371,7 +406,18 @@ public class TextToSpeech { /** * {@hide} */ - protected static final int NB_CACHED_PARAMS = 8; + protected static final int PARAM_POSITION_VOLUME = 16; + + /** + * {@hide} + */ + protected static final int PARAM_POSITION_PAN = 18; + + + /** + * {@hide} + */ + protected static final int NB_CACHED_PARAMS = 20; } /** @@ -416,6 +462,8 @@ public class TextToSpeech { mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID] = Engine.KEY_PARAM_UTTERANCE_ID; mCachedParams[Engine.PARAM_POSITION_ENGINE] = Engine.KEY_PARAM_ENGINE; mCachedParams[Engine.PARAM_POSITION_PITCH] = Engine.KEY_PARAM_PITCH; + mCachedParams[Engine.PARAM_POSITION_VOLUME] = Engine.KEY_PARAM_VOLUME; + mCachedParams[Engine.PARAM_POSITION_PAN] = Engine.KEY_PARAM_PAN; // Leave all defaults that are shown in Settings uninitialized/at the default // so that the values set in Settings will take effect if the application does @@ -429,6 +477,8 @@ public class TextToSpeech { mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = ""; mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = ""; mCachedParams[Engine.PARAM_POSITION_PITCH + 1] = "100"; + mCachedParams[Engine.PARAM_POSITION_VOLUME + 1] = Engine.DEFAULT_VOLUME_STRING; + mCachedParams[Engine.PARAM_POSITION_PAN + 1] = Engine.DEFAULT_PAN_STRING; initTts(); } @@ -717,24 +767,18 @@ public class TextToSpeech { { synchronized (mStartLock) { int result = ERROR; - Log.i("TTS received: ", text); + Log.i("TTS", "speak() queueMode=" + queueMode); if (!mStarted) { return result; } try { if ((params != null) && (!params.isEmpty())) { - String extra = params.get(Engine.KEY_PARAM_STREAM); - if (extra != null) { - mCachedParams[Engine.PARAM_POSITION_STREAM + 1] = extra; - } - extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID); - if (extra != null) { - mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra; - } - extra = params.get(Engine.KEY_PARAM_ENGINE); - if (extra != null) { - mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = extra; - } + setCachedParam(params, Engine.KEY_PARAM_STREAM, Engine.PARAM_POSITION_STREAM); + setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID, + Engine.PARAM_POSITION_UTTERANCE_ID); + setCachedParam(params, Engine.KEY_PARAM_ENGINE, Engine.PARAM_POSITION_ENGINE); + setCachedParam(params, Engine.KEY_PARAM_VOLUME, Engine.PARAM_POSITION_VOLUME); + setCachedParam(params, Engine.KEY_PARAM_PAN, Engine.PARAM_POSITION_PAN); } result = mITts.speak(mPackageName, text, queueMode, mCachedParams); } catch (RemoteException e) { @@ -791,10 +835,9 @@ public class TextToSpeech { if (extra != null) { mCachedParams[Engine.PARAM_POSITION_STREAM + 1] = extra; } - extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID); - if (extra != null) { - mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra; - } + setCachedParam(params, Engine.KEY_PARAM_STREAM, Engine.PARAM_POSITION_STREAM); + setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID, + Engine.PARAM_POSITION_UTTERANCE_ID); } result = mITts.playEarcon(mPackageName, earcon, queueMode, null); } catch (RemoteException e) { @@ -845,10 +888,8 @@ public class TextToSpeech { } try { if ((params != null) && (!params.isEmpty())) { - String extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID); - if (extra != null) { - mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra; - } + setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID, + Engine.PARAM_POSITION_UTTERANCE_ID); } result = mITts.playSilence(mPackageName, durationInMs, queueMode, mCachedParams); } catch (RemoteException e) { @@ -870,6 +911,7 @@ public class TextToSpeech { mStarted = false; initTts(); } finally { + resetCachedParams(); return result; } } @@ -1224,6 +1266,7 @@ public class TextToSpeech { */ public int synthesizeToFile(String text, HashMap<String,String> params, String filename) { + Log.i("TTS", "synthesizeToFile()"); synchronized (mStartLock) { int result = ERROR; if (!mStarted) { @@ -1232,14 +1275,9 @@ public class TextToSpeech { try { if ((params != null) && (!params.isEmpty())) { // no need to read the stream type here - String extra = params.get(Engine.KEY_PARAM_UTTERANCE_ID); - if (extra != null) { - mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID + 1] = extra; - } - extra = params.get(Engine.KEY_PARAM_ENGINE); - if (extra != null) { - mCachedParams[Engine.PARAM_POSITION_ENGINE + 1] = extra; - } + setCachedParam(params, Engine.KEY_PARAM_UTTERANCE_ID, + Engine.PARAM_POSITION_UTTERANCE_ID); + setCachedParam(params, Engine.KEY_PARAM_ENGINE, Engine.PARAM_POSITION_ENGINE); } result = mITts.synthesizeToFile(mPackageName, text, mCachedParams, filename) ? SUCCESS : ERROR; @@ -1277,6 +1315,19 @@ public class TextToSpeech { mCachedParams[Engine.PARAM_POSITION_STREAM + 1] = String.valueOf(Engine.DEFAULT_STREAM); mCachedParams[Engine.PARAM_POSITION_UTTERANCE_ID+ 1] = ""; + mCachedParams[Engine.PARAM_POSITION_VOLUME + 1] = Engine.DEFAULT_VOLUME_STRING; + mCachedParams[Engine.PARAM_POSITION_PAN + 1] = Engine.DEFAULT_PAN_STRING; + } + + /** + * Convenience method to save a parameter in the cached parameter array, at the given index, + * for a property saved in the given hashmap. + */ + private void setCachedParam(HashMap<String,String> params, String key, int keyIndex) { + String extra = params.get(key); + if (extra != null) { + mCachedParams[keyIndex+1] = extra; + } } /** diff --git a/packages/TtsService/jni/android_tts_SynthProxy.cpp b/packages/TtsService/jni/android_tts_SynthProxy.cpp index 8dc88db..27d1fc0 100644 --- a/packages/TtsService/jni/android_tts_SynthProxy.cpp +++ b/packages/TtsService/jni/android_tts_SynthProxy.cpp @@ -17,7 +17,7 @@ #include <stdio.h> #include <unistd.h> -#define LOG_TAG "SynthProxy" +#define LOG_TAG "SynthProxyJNI" #include <utils/Log.h> #include <nativehelper/jni.h> @@ -33,8 +33,8 @@ #define DEFAULT_TTS_FORMAT AudioSystem::PCM_16_BIT #define DEFAULT_TTS_NB_CHANNELS 1 #define DEFAULT_TTS_BUFFERSIZE 2048 -// TODO use the TTS stream type when available #define DEFAULT_TTS_STREAM_TYPE AudioSystem::MUSIC +#define DEFAULT_VOLUME 1.0f // EQ + BOOST parameters #define FILTER_LOWSHELF_ATTENUATION -18.0f // in dB @@ -154,7 +154,7 @@ static Mutex engineMutex; class SynthProxyJniStorage { public : jobject tts_ref; - android_tts_engine_t* mEngine; + android_tts_engine_t* mEngine; void* mEngineLibHandle; AudioTrack* mAudioOut; int8_t mPlayState; @@ -165,6 +165,7 @@ class SynthProxyJniStorage { int mNbChannels; int8_t * mBuffer; size_t mBufferSize; + float mVolume[2]; SynthProxyJniStorage() { tts_ref = NULL; @@ -179,6 +180,8 @@ class SynthProxyJniStorage { mBufferSize = DEFAULT_TTS_BUFFERSIZE; mBuffer = new int8_t[mBufferSize]; memset(mBuffer, 0, mBufferSize); + mVolume[AudioTrack::LEFT] = DEFAULT_VOLUME; + mVolume[AudioTrack::RIGHT] = DEFAULT_VOLUME; } ~SynthProxyJniStorage() { @@ -189,7 +192,7 @@ class SynthProxyJniStorage { mEngine = NULL; } if (mEngineLibHandle) { - //LOGE("~SynthProxyJniStorage(): before close library"); + //LOGV("~SynthProxyJniStorage(): before close library"); int res = dlclose(mEngineLibHandle); LOGE_IF( res != 0, "~SynthProxyJniStorage(): dlclose returned %d", res); } @@ -241,7 +244,7 @@ class SynthProxyJniStorage { mAudioOut = NULL; } else { //LOGI("AudioTrack OK"); - mAudioOut->setVolume(1.0f, 1.0f); + mAudioOut->setVolume(mVolume[AudioTrack::LEFT], mVolume[AudioTrack::RIGHT]); LOGV("AudioTrack ready"); } mPlayLock.unlock(); @@ -277,7 +280,7 @@ extern "C" android_tts_callback_status_t __ttsSynthDoneCB(void ** pUserdata, uint32_t rate, android_tts_audio_format_t format, int channel, int8_t **pWav, size_t *pBufferSize, - android_tts_synth_status_t status) + android_tts_synth_status_t status) { //LOGV("ttsSynthDoneCallback: %d bytes", bufferSize); AudioSystem::audio_format encoding; @@ -618,7 +621,7 @@ android_tts_SynthProxy_setSpeechRate(JNIEnv *env, jobject thiz, jint jniData, Mutex::Autolock l(engineMutex); SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; - LOGI("setting speech rate to %d", speechRate); + //LOGI("setting speech rate to %d", speechRate); android_tts_engine_t *engine = pSynthData->mEngine; if (engine) { @@ -647,7 +650,7 @@ android_tts_SynthProxy_setPitch(JNIEnv *env, jobject thiz, jint jniData, sprintf(buffer, "%d", pitch); SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; - LOGI("setting pitch to %d", pitch); + //LOGI("setting pitch to %d", pitch); android_tts_engine_t *engine = pSynthData->mEngine; if (engine) { @@ -783,7 +786,7 @@ android_tts_SynthProxy_synthesizeToFile(JNIEnv *env, jobject thiz, jint jniData, static int android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData, - jstring textJavaString, jint javaStreamType) + jstring textJavaString, jint javaStreamType, jfloat volume, jfloat pan) { int result = ANDROID_TTS_FAILURE; @@ -798,9 +801,34 @@ android_tts_SynthProxy_speak(JNIEnv *env, jobject thiz, jint jniData, SynthProxyJniStorage* pSynthData = (SynthProxyJniStorage*)jniData; - pSynthData->mPlayLock.lock(); - pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING; - pSynthData->mPlayLock.unlock(); + {//scope for lock on mPlayLock + Mutex::Autolock _l(pSynthData->mPlayLock); + + pSynthData->mPlayState = SYNTHPLAYSTATE_IS_PLAYING; + + // clip volume and pan + float vol = (volume > 1.0f) ? 1.0f : (volume < 0.0f) ? 0.0f : volume; + float panning = (pan > 1.0f) ? 1.0f : (pan < -1.0f) ? -1.0f : pan; + // compute playback volume based on volume and pan, using balance rule, in order to avoid + // lowering volume when panning in center + pSynthData->mVolume[AudioTrack::LEFT] = vol; + pSynthData->mVolume[AudioTrack::RIGHT] = vol; + if (panning > 0.0f) { + pSynthData->mVolume[AudioTrack::LEFT] *= (1.0f - panning); + } else if (panning < 0.0f) { + pSynthData->mVolume[AudioTrack::RIGHT] *= (1.0f + panning); + } + + // apply the volume if there is an output + if (NULL != pSynthData->mAudioOut) { + pSynthData->mAudioOut->setVolume(pSynthData->mVolume[AudioTrack::LEFT], + pSynthData->mVolume[AudioTrack::RIGHT]); + } + + //LOGV("android_tts_SynthProxy_speak() vol=%.3f pan=%.3f, mVolume=[%.1f %.1f]", + // volume, pan, + // pSynthData->mVolume[AudioTrack::LEFT], pSynthData->mVolume[AudioTrack::RIGHT]); + } afterSynthData_t* pForAfter = new (afterSynthData_t); pForAfter->jniStorage = jniData; @@ -935,7 +963,7 @@ static JNINativeMethod gMethods[] = { (void*)android_tts_SynthProxy_stopSync }, { "native_speak", - "(ILjava/lang/String;I)I", + "(ILjava/lang/String;IFF)I", (void*)android_tts_SynthProxy_speak }, { "native_synthesizeToFile", diff --git a/packages/TtsService/src/android/tts/SynthProxy.java b/packages/TtsService/src/android/tts/SynthProxy.java index 525a504..f5f5fcf 100755 --- a/packages/TtsService/src/android/tts/SynthProxy.java +++ b/packages/TtsService/src/android/tts/SynthProxy.java @@ -78,12 +78,13 @@ public class SynthProxy { /** * Synthesize speech and speak it directly using AudioTrack. */ - public int speak(String text, int streamType) { + public int speak(String text, int streamType, float volume, float pan) { + Log.i(TAG, "speak() on stream "+ streamType); if ((streamType > -1) && (streamType < AudioSystem.getNumStreamTypes())) { - return native_speak(mJniData, text, streamType); + return native_speak(mJniData, text, streamType, volume, pan); } else { Log.e("SynthProxy", "Trying to speak with invalid stream type " + streamType); - return native_speak(mJniData, text, AudioManager.STREAM_MUSIC); + return native_speak(mJniData, text, AudioManager.STREAM_MUSIC, volume, pan); } } @@ -93,6 +94,7 @@ public class SynthProxy { * "/sdcard/???.wav" is recommended. */ public int synthesizeToFile(String text, String filename) { + Log.i(TAG, "synthesizeToFile() to file "+ filename); return native_synthesizeToFile(mJniData, text, filename); } @@ -192,7 +194,8 @@ public class SynthProxy { private native final int native_stopSync(int jniData); - private native final int native_speak(int jniData, String text, int streamType); + private native final int native_speak(int jniData, String text, int streamType, float volume, + float pan); private native final int native_synthesizeToFile(int jniData, String text, String filename); diff --git a/packages/TtsService/src/android/tts/TtsService.java b/packages/TtsService/src/android/tts/TtsService.java index 08bbfb2..c562327 100755 --- a/packages/TtsService/src/android/tts/TtsService.java +++ b/packages/TtsService/src/android/tts/TtsService.java @@ -121,7 +121,6 @@ public class TtsService extends Service implements OnCompletionListener { private static final int SPEECHQUEUELOCK_TIMEOUT = 5000; private static final int MAX_SPEECH_ITEM_CHAR_LENGTH = 4000; private static final int MAX_FILENAME_LENGTH = 250; - // TODO use the TTS stream type when available private static final int DEFAULT_STREAM_TYPE = AudioManager.STREAM_MUSIC; // TODO use TextToSpeech.DEFAULT_SYNTH once it is unhidden private static final String DEFAULT_SYNTH = "com.svox.pico"; @@ -791,6 +790,8 @@ public class TtsService extends Service implements OnCompletionListener { String speechRate = ""; String engine = ""; String pitch = ""; + float volume = TextToSpeech.Engine.DEFAULT_VOLUME; + float pan = TextToSpeech.Engine.DEFAULT_PAN; if (speechItem.mParams != null){ for (int i = 0; i < speechItem.mParams.size() - 1; i = i + 2){ String param = speechItem.mParams.get(i); @@ -816,6 +817,18 @@ public class TtsService extends Service implements OnCompletionListener { engine = speechItem.mParams.get(i + 1); } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_PITCH)) { pitch = speechItem.mParams.get(i + 1); + } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_VOLUME)) { + try { + volume = Float.parseFloat(speechItem.mParams.get(i + 1)); + } catch (NumberFormatException e) { + volume = TextToSpeech.Engine.DEFAULT_VOLUME; + } + } else if (param.equals(TextToSpeech.Engine.KEY_PARAM_PAN)) { + try { + pan = Float.parseFloat(speechItem.mParams.get(i + 1)); + } catch (NumberFormatException e) { + pan = TextToSpeech.Engine.DEFAULT_PAN; + } } } } @@ -844,7 +857,7 @@ public class TtsService extends Service implements OnCompletionListener { setPitch("", getDefaultPitch()); } try { - sNativeSynth.speak(speechItem.mText, streamType); + sNativeSynth.speak(speechItem.mText, streamType, volume, pan); } catch (NullPointerException e) { // synth will become null during onDestroy() Log.v(SERVICE_TAG, " null synth, can't speak"); |